/*******************************************************************************
*    Stimulant or depressant? Resource-related income shocks and conflict      *
*                                                                              *
*                  Data Preparation - Public Version                           *
********************************************************************************


Author:   Kai Gehring, Sarah Langlotz, Stefan Kienberger
Email:    sarah.langlotz@uni-goettingen.de
Preamble: This dofile creates a strongly balanced dataset 
(GLK_final_analysis_external.dta) where observations are uniquely identified by the 
variables districtcode and year as well as a household-level dataset 
(finaldata_hhlevel.dta) from the NRVA survey. Take note that we cannot share the 
NRVA and SIGACTS data in their raw form due to proprietary reasons (see README). 
In this dofile we replaced the SIGACTS data with randomized numbers. We further 
comment out the code that leads to the creation of "finaldata_hhlevel.dta" and "NRVA2003_languages.dta" which we provide in the Harvard Dataverse since they are needed to create the final output.  
*******************************************************************************/

*------------------------------------------------------------------------------*
* Program Setup
*------------------------------------------------------------------------------*

drop _all
clear matrix
clear mata
set maxvar 10000
set matsize 10000

version 17.0

*------------------------------------------------------------------------------*

*------------------------------------------------------------------------------*
* Commands to be installed (uncomment in first run)
*------------------------------------------------------------------------------*

/*
cap ado uninstall unique
net install unique, from(http://fmwww.bc.edu/RePEc/bocode/u)
cap ado uninstall mdesc
net install mdesc, from(http://fmwww.bc.edu/RePEc/bocode/m)
*/


*------------------------------------------------------------------------------*

*------------------------------------------------------------------------------*
* Directories (set before first run, otherwise you receive an "invalid syntax" error)
*------------------------------------------------------------------------------*

local DIR = //Insert your directory here 
cap cd "`DIR'"

*------------------------------------------------------------------------------*


*------------------------------------------------------------------------------*
* File requirements for smooth replication
*------------------------------------------------------------------------------*

/*after downloading the data from the Harvard Dataverse, create the following
three folders:
- "processed"
- "graphs"
- "tables"
*/


*------------------------------------------------------------------------------*

/*******************************************************************************
*Generation of a file that contains all important district and province variables necessary for merging*
*******************************************************************************/


import excel "mergefile_districtnames_all.xlsx", sheet("Tabelle1") firstrow clear
drop E-L

drop if missing(PossibleSpellings)

duplicates drop PossibleSpellings ProvinceName, force

destring DistrictCode, replace

rename ProvinceName province
rename DistrictCode districtcode
rename DistrictName district

save "processed\temp.dta", replace
	
*Compare to shapefile district names codes and spellings
import delimited "districts_fromshp398.csv", clear
save "processed\districts_fromshp398.dta", replace

rename dist_34_na PossibleSpellings
replace PossibleSpellings=lower(PossibleSpellings)
replace prov_34_na=lower(prov_34_na)

tab distid

replace PossibleSpellings="arghandab (kandahar)" if PossibleSpellings=="arghandab" & prov_34_na=="kandahar"
replace PossibleSpellings="arghandab (zabul)" if PossibleSpellings=="arghandab" & prov_34_na=="zabul"
replace PossibleSpellings="baharak (badakhshan)" if PossibleSpellings=="baharak" & prov_34_na=="badakhshan"
replace PossibleSpellings="baharak (takhar)" if PossibleSpellings=="baharak" & prov_34_na=="takhar"
replace PossibleSpellings="dawlatabad (balkh)" if PossibleSpellings=="dawlatabad" & prov_34_na=="balkh"
replace PossibleSpellings="dawlatabad (faryab)" if PossibleSpellings=="dawlatabad" & prov_34_na=="faryab"
replace PossibleSpellings="dawlatabad (balkh)" if PossibleSpellings=="dawlat abad" & prov_34_na=="balkh"
replace PossibleSpellings="dawlatabad (faryab)" if PossibleSpellings=="dawlat abad" & prov_34_na=="faryab"
replace PossibleSpellings="fayzabad (badakhshan)" if PossibleSpellings=="fayzabad" & prov_34_na=="Badakhshan"
replace PossibleSpellings="fayzabad (jawzjan)" if PossibleSpellings=="fayzabad" & prov_34_na=="jawzjan"
replace PossibleSpellings="fayzabad (badakhshan)" if PossibleSpellings=="fayzabad" & prov_34_na=="badakhshan"
replace PossibleSpellings="fayzabad (jawzjan)" if PossibleSpellings=="fayzabad" & prov_34_na=="jawzjan"
replace PossibleSpellings="jani khel (paktya)" if PossibleSpellings=="jani khail" & prov_34_na=="paktya"
replace PossibleSpellings="jani khel (paktika)" if PossibleSpellings=="jani khel" & prov_34_na=="paktika"	
replace PossibleSpellings="kohistan (badakhshan)" if PossibleSpellings=="kohistan" & prov_34_na=="badakhshan"
replace PossibleSpellings="kohistan (faryab)" if PossibleSpellings=="kohistan" & prov_34_na=="faryab"
replace PossibleSpellings="muqur (ghazni)" if PossibleSpellings=="muqur" & prov_34_na=="ghazni"
replace PossibleSpellings="muqur (badghis)" if PossibleSpellings=="muqur" & prov_34_na=="badghis"		
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qarabagh" & prov_34_na=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qara bagh" & prov_34_na=="ghazni"
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qara bagh" & prov_34_na=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qarrabagh" & prov_34_na=="ghazni"
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qarrabagh" & prov_34_na=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qarabagh" & prov_34_na=="ghazni"
replace PossibleSpellings="reg (hilmand)" if PossibleSpellings=="reg" & prov_34_na=="hilmand"
replace PossibleSpellings="reg (kandahar)" if PossibleSpellings=="registan" & prov_34_na=="kandahar"
replace PossibleSpellings="ishkamish (takhar)" if PossibleSpellings=="ishkamish" & prov_34_na=="takhar"
replace PossibleSpellings="ishkashim (badakhshan)" if PossibleSpellings=="ishkashiem" & prov_34_na=="badakhshan"
replace PossibleSpellings="zinda jan" if PossibleSpellings=="zanda  jan"

merge 1:1 PossibleSpellings  using "processed\temp.dta"
drop if _merge==2
drop _merge


keep objectid province districtcode district PossibleSpellings prov_34_na

gen district_alternative = PossibleSpellings
replace district_alternative = "jani khel" if PossibleSpellings == "jani khel (paktika)"
replace district_alternative = "arghandab" if PossibleSpellings == "arghandab (zabul)"
replace district_alternative = "registan" if PossibleSpellings == "reg (kandahar)"
replace district_alternative = "arghandab" if PossibleSpellings == "arghandab (kandahar)"
replace district_alternative = "zanda  jan" if PossibleSpellings == "zinda jan"
replace district_alternative = "muqur" if PossibleSpellings == "muqur (badghis)"
replace district_alternative = "dawlatabad" if PossibleSpellings == "dawlatabad (faryab)"
replace district_alternative = "kohistan" if PossibleSpellings == "kohistan (faryab)"
replace district_alternative = "fayzabad" if PossibleSpellings == "fayzabad (jawzjan)"
replace district_alternative = "dawlatabad" if PossibleSpellings == "dawlatabad (balkh)"
replace district_alternative = "baharak" if PossibleSpellings == "baharak (takhar)"
replace district_alternative = "ishkamish" if PossibleSpellings == "ishkamish (takhar)"
replace district_alternative = "warduj " if PossibleSpellings == "warduj"
replace district_alternative = "kohistan" if PossibleSpellings == "kohistan (badakhshan)"
replace district_alternative = "baharak" if PossibleSpellings == "baharak (badakhshan)"
replace district_alternative = "ishkashiem" if PossibleSpellings == "ishkashim (badakhshan)"
replace district_alternative = "fayzabad" if PossibleSpellings == "fayzabad (badakhshan)"
replace district_alternative = "jani khail" if PossibleSpellings == "jani khel (paktya)"
replace district_alternative = "qarabagh" if PossibleSpellings == "qarabagh (ghazni)"
replace district_alternative = "muqur" if PossibleSpellings == "muqur (ghazni)"
replace district_alternative = "qarabagh" if PossibleSpellings == "qarabagh (kabul)"



rename prov_34_na province_alternative
rename objectid OBJECTID

unique province_alternative district_alternative

destring OBJECTID, replace 

sort OBJECTID

gen OBJECTID_1 = _n

unique districtcode
unique district
unique OBJECTID

order OBJECTID OBJECTID_1 districtcode district district_alternative PossibleSpellings province province_alternative 
sort districtcode

save "processed\districtnames.dta", replace



/*******************************************************************************
*                               Military bases                                 *
*******************************************************************************/

import excel "Military Bases and Camps.xlsx", sheet("Military Bases and Camps") firstrow clear
rename District PossibleSpellings
drop if missing(PossibleSpellings)
replace PossibleSpellings=lower(PossibleSpellings)
replace PossibleSpellings="dihrawud" if PossibleSpellings=="deh rawod"
replace PossibleSpellings="khash rod" if PossibleSpellings=="delaram" /*https://en.wikipedia.org/wiki/Delaram*/
replace PossibleSpellings="garmser" if PossibleSpellings=="garmsir"
replace PossibleSpellings="khost(matun)" if PossibleSpellings=="khost (matun)"
replace PossibleSpellings="mando zayi" if PossibleSpellings=="mandozayi"
replace PossibleSpellings="mazari sharif" if PossibleSpellings=="mazar-e sharif" | PossibleSpellings=="mazar-e-sharif"
replace PossibleSpellings="nahri sarraj" if PossibleSpellings=="nahri saraj" | PossibleSpellings=="nahri saraj "
replace PossibleSpellings="tirin kot" if PossibleSpellings=="tarin kot"
		
merge m:1 PossibleSpellings using "processed\temp.dta" 
drop if _merge==1 | _merge==2
drop _merge

destring Closed, replace
replace Closed=2015 if Closed==.
replace Opened="2008" if Opened=="Before/in 2008"
replace Opened="2006" if Opened=="Before/in 2006"

destring Opened, replace
replace Opened=2001 if Opened==.

gen camps=1


bysort districtcode: egen minopened=min(Opened)
bysort districtcode: egen maxclosed=max(Closed)
collapse (sum) camps (last) district minopened maxclosed, by (districtcode)


save "processed\camps.dta", replace 


*********************To get more precise dates
*Open as year 
import excel "Military Bases and Camps.xlsx", sheet("Military Bases and Camps") firstrow clear
rename District PossibleSpellings
drop if missing(PossibleSpellings)
replace PossibleSpellings=lower(PossibleSpellings)
replace PossibleSpellings="dihrawud" if PossibleSpellings=="deh rawod"
replace PossibleSpellings="khash rod" if PossibleSpellings=="delaram" /*https://en.wikipedia.org/wiki/Delaram*/
replace PossibleSpellings="garmser" if PossibleSpellings=="garmsir"
replace PossibleSpellings="khost(matun)" if PossibleSpellings=="khost (matun)"
replace PossibleSpellings="mando zayi" if PossibleSpellings=="mandozayi"
replace PossibleSpellings="mazari sharif" if PossibleSpellings=="mazar-e sharif" | PossibleSpellings=="mazar-e-sharif"
replace PossibleSpellings="nahri sarraj" if PossibleSpellings=="nahri saraj" | PossibleSpellings=="nahri saraj "
replace PossibleSpellings="tirin kot" if PossibleSpellings=="tarin kot"
	
merge m:1 PossibleSpellings using "processed\temp.dta" 
drop if _merge==1 | _merge==2
drop _merge PossibleSpellings

destring Closed, replace
replace Closed=2015 if Closed==.
replace Opened="2008" if Opened=="Before/in 2008"
replace Opened="2006" if Opened=="Before/in 2006"

destring Opened, replace
replace Opened=2001 if Opened==.

gen camps=1


bysort districtcode Opened: egen bases_opened=total(camps), missing
bysort districtcode Closed: egen bases_closed=total(camps), missing

rename Opened year

collapse (mean) bases_opened (last) district, by (districtcode year)
save "processed\camps_opened.dta", replace


*Closed as year

import excel "Military Bases and Camps.xlsx", sheet("Military Bases and Camps") firstrow clear
rename District PossibleSpellings
drop if missing(PossibleSpellings)
replace PossibleSpellings=lower(PossibleSpellings)
replace PossibleSpellings="dihrawud" if PossibleSpellings=="deh rawod"
replace PossibleSpellings="khash rod" if PossibleSpellings=="delaram" /*https://en.wikipedia.org/wiki/Delaram*/
replace PossibleSpellings="garmser" if PossibleSpellings=="garmsir"
replace PossibleSpellings="khost(matun)" if PossibleSpellings=="khost (matun)"
replace PossibleSpellings="mando zayi" if PossibleSpellings=="mandozayi"
replace PossibleSpellings="mazari sharif" if PossibleSpellings=="mazar-e sharif" | PossibleSpellings=="mazar-e-sharif"
replace PossibleSpellings="nahri sarraj" if PossibleSpellings=="nahri saraj" | PossibleSpellings=="nahri saraj "
replace PossibleSpellings="tirin kot" if PossibleSpellings=="tarin kot"
	
merge m:1 PossibleSpellings using "processed\temp.dta" 
drop if _merge==1 | _merge==2
drop _merge PossibleSpellings

destring Closed, replace
replace Closed=2015 if Closed==.
replace Opened="2008" if Opened=="Before/in 2008"
replace Opened="2006" if Opened=="Before/in 2006"

destring Opened, replace
replace Opened=2001 if Opened==.

gen camps=1


bysort districtcode Opened: egen bases_opened=total(camps), missing
bysort districtcode Closed: egen bases_closed=total(camps), missing

rename Closed year

collapse (mean) bases_closed (last) district, by (districtcode year)
save "processed\camps_closed.dta", replace



/*******************************************************************************
*                     Province-level dataset                                   *
*******************************************************************************/

use "processed\districtnames.dta"
sort province
bysort province: gen n=_n
keep if n==1
keep province 
save "processed\province.dta", replace
clear


/*******************************************************************************
*                                  Deflators                                   *
*******************************************************************************/

***US, 2010 base year
**WDI
import excel "GDPdeflator.xlsx", sheet("Data") firstrow clear
rename GDPdeflatorbaseyearvariesb GDPdeflUS
rename Time year
rename CountryName country
rename CountryCode code
drop TimeCode Inflationconsumerpricesannu
replace GDPdefl="." if GDPdefl==".."
destring GDPdefl, replace
drop if missing(country)
drop if missing(code)

keep if code=="USA"

save "processed\GDPdeflUS.dta", replace


***Euro zone, base year 2010 
import excel "EU_deflator.xlsx", firstrow clear
rename deflator GDPdeflEU

save "processed\GDPdeflEU.dta", replace


*OECD EUROZONE, EUROPEAN Union (28)
import excel "oecd_CPI.xlsx", sheet("OECD.Stat export") cellrange(A6:F47) firstrow clear
keep Country Time Index2010100
drop if missing(Time)
rename Time year
destring year, replace
	

rename Index2010100 CPI_EU
drop if Country=="Euro area (19 countries)"
save "processed\CPI_EU.dta", replace


/*******************************************************************************
*                                 Drug Prices                                  *
*******************************************************************************/

***get CPI 
import excel "imf_CPI.xlsx", sheet("Data") firstrow clear 
drop if CountryCode==""
rename CountryCode ccode
rename Consumerpriceindex2010100 CPI
replace CPI="." if CPI==".."
destring CPI, replace
rename Time year

preserve
	keep if CountryName=="United States"
	gen CPI_USD=CPI
	drop CPI TimeCode CountryName ccode
	save "processed\CPI_USD.dta", replace
restore

drop TimeCode CountryName


merge m:1 year using "processed\CPI_EU.dta"
drop _merge
 
replace CPI=CPI_EU if ccode=="BGR" | ccode=="HRV" | ccode=="CZE" | ccode=="DNK" | ccode=="HUN" |ccode=="POL" | ccode=="ROM" | ccode=="SWE" | ccode=="GBR"

drop CPI_EU Country

save "processed\CPI.dta", replace

*Countrycode data
import delimited using "countrycode.csv", clear varnames(1)
save "processed\countrycode.dta", replace


***get exchange rate: Total, National currency units/US dollar, 2000 – 2016
*download: https://data.oecd.org/conversion/exchange-rates.htm, 05.05.2017
import delimited "exchangerates.csv", delimiter(`",""') clear
keep location time v11
replace v11=subinstr(v11, ",", "",.)
rename v11 exchangerate
keep if location=="EU28," | location=="EA19,"
destring exchangerate, replace
gen exchangerate28=exchangerate if location=="EU28,"
gen exchangerate19=exchangerate if location=="EA19,"
drop exchangerate
rename time year
preserve 
	keep if location=="EU28,"
	drop location exchangerate19		
	save "processed\exchangerateEU28.dta", replace
restore
	keep if location=="EA19,"
	drop location exchangerate28
	save "processed\exchangerateEU19.dta", replace


***some drugs
foreach code in cocaineprice amphetamineprice brownheroinprice LSDprice Ecstasyprice  {
	import excel "`code'.xlsx", cellrange(A4:V39) firstrow clear
	drop if missing(Country)
	drop if strpos(Country,"(1) Minimum= minimum price recorded") 
	foreach v of varlist C-U {
		local x : variable label `v'
		rename `v' _`x'
	}

	drop V Study
	reshape long _,i(Country) j(year)
	rename _ `code'
	rename Country rec
	split rec, parse(-)
	drop rec
	rename rec1 rec
	merge m:1 rec using "processed\countrycode.dta"
	drop if _merge==2
	drop _merge
	collapse (mean) `code', by(ccode year)
	merge m:1 ccode year using "processed\CPI.dta"
	drop if _merge==2
	drop _merge
	rename `code' `code'_cur
	gen `code'n=`code'_cur/CPI*100
	collapse (mean) `code'n `code'_cur, by(year)
	save "processed/`code'new.dta", replace
}

***more drugs

foreach code in crackprice whiteheroinprice Methamphetamineprice {
	import excel "`code'.xlsx", cellrange(A4:V36) firstrow clear
	drop if missing(Country)
	drop if strpos(Country,"(1) Minimum= minimum price recorded") 
	foreach v of varlist C-U {
		local x : variable label `v'
		rename `v' _`x'
	}
	drop V Study
	reshape long _,i(Country) j(year)
	rename _ `code'
	rename Country rec
	split rec, parse(-)
	drop rec
	rename rec1 rec
	merge m:1 rec using "processed\countrycode"
	drop if _merge==2
	drop _merge
	collapse (mean) `code', by(ccode year)
	merge m:1 ccode year using "processed\CPI.dta"
	drop if _merge==2
	drop _merge
	rename `code' `code'_cur
	gen `code'n=`code'_cur/CPI*100	
	collapse (mean) `code'n `code'_cur, by(year)
	save "processed/`code'new.dta", replace
}


***UNODC Fresh opium farm-gate prices at harvest time (weighted by regional production) in Afghanistan (US$/kg)
import excel "opiumpriceAFG_yearly.xlsx", sheet("data") firstrow clear
gen ccode="AFG"
merge m:1 year using "processed\CPI_EU.dta"
drop if _merge==2
drop _merge
merge m:1 year using "processed\CPI_USD.dta"
drop if _merge==2
drop _merge
rename opiumpriceAFG opiumpriceAFG_curUSD
gen opiumpriceAFG_USD=opiumpriceAFG_curUSD/CPI_USD*100
label var opiumpriceAFG_USD "Fresh opium farm-gate prices at harvest time (weighted by regional production) in Afghanistan (US$/kg) in constant 2010 USD"

merge m:1 year using "processed\exchangerateEU28.dta"
drop if _merge==2
drop _merge

gen opiumpriceAFG_cur=opiumpriceAFG_curUSD*exchangerate28
label var opiumpriceAFG_cur "Fresh opium farm-gate prices at harvest time in Afghanistan in current EU/kg  (weighted by regional production) "
gen opiumpriceAFG=opiumpriceAFG_cur/CPI_EU*100
label var opiumpriceAFG "Fresh opium farm-gate prices at harvest time in Afghanistan in const.2010 EU/kg (weighted by regional production)"

drop CPI* ccode Country exchangerate28

save "processed\opiumpriceAFG_yearly.dta", replace
 

***Local Drug prices, monthly, UNODC drug reports _ INCLUDING FARM-GATE PRICES AND TRADER PRICES
*extracted from Afghanistan Opium Price Monitoring
import excel "opium_prices_farmer_and_trader.xlsx", sheet("table1") firstrow clear
drop H Source
rename Province province 
replace province=lower(province)
merge m:1 province using "processed\province.dta"
drop if _merge==1 | _merge==2
drop _merge
rename Year year
destring AverageDryOpiumPricebyTrade, replace
destring YearlyAverageDryOpiumPrice, replace
destring FarmerDryopiumPriceinUSDkg, replace
replace FarmerDryopiumPriceinUSDkg=YearlyAverageDryOpiumPrice if FarmerDryopiumPriceinUSDkg==. /*YearlyAverageDryOpiumPrice is on farm-gate level*/
bysort province year: egen dryopium_price=mean(AverageDryOpiumPricebyTrade)
bysort province year: egen dryopium_fgprice=mean(FarmerDryopiumPriceinUSDkg)

collapse (mean) AverageDryOpiumPricebyTrade  dryopium_price dryopium_fgprice, by(province year)
drop AverageDryOpiumPricebyTrade
rename dryopium_price dryopium_price_curUSD
label var dryopium_price_curUSD "Average Dry Opium Price by Trader in current USD/kg"
rename dryopium_fgprice dryopium_fgprice_curUSD
label var dryopium_fgprice_curUSD "Average Farmer Dry opium Price in current USD/kg"

merge m:1 year using "processed\CPI_USD.dta"
drop if _merge==2
drop _merge

gen dryopium_priceUSD=dryopium_price_curUSD/CPI_USD*100
label var dryopium_priceUSD "Average Dry Opium Price by Trader in constant 2010 USD/kg"
gen dryopium_fgUSD=dryopium_fgprice_curUSD/CPI_USD*100
label var dryopium_fgUSD "Average Farmer Dry opium Price in constant 2010 USD/kg"

drop CPI_USD

merge m:1 year using "processed\CPI_EU.dta"
drop if _merge==2
drop _merge

merge m:1 year using "processed\exchangerateEU28.dta"
drop if _merge==2
drop _merge

gen dryopium_price_cur=dryopium_price_curUSD*exchangerate28
label var dryopium_price_cur "Average Dry Opium Price by Trader in current EU/kg"

gen dryopium_fgprice_cur=dryopium_fgprice_curUSD*exchangerate28 
label var dryopium_fgprice_cur "Average Farmer Dry opium Price in current EU/kg"

gen dryopium_price=dryopium_price_cur/CPI_EU*100
label var dryopium_price "Average Dry Opium Price by Trader in constant 2010 EU/kg"

gen dryopium_fgprice=dryopium_fgprice_cur/CPI_EU*100
label var dryopium_fgprice "Average Farmer Dry opium in constant 2010 EU/kg"


drop Country CPI_EU exchangerate28

save "processed\localopiumpricesbyprovince.dta", replace


****Legal Opioid Data

import excel "Opioid Prescriptions.xlsx", sheet ("Tabelle1") firstrow clear
rename Year year
rename NumberOfOpioidPrescriptionsU prescription

save "processed\opioidprescription.dta", replace
clear



/*******************************************************************************
*                                 Rainfall Data                                *
*******************************************************************************/

*********These are the gids for each district in Afghanistan
import excel "Outputprio.xls", sheet("Outputprio") firstrow clear
merge m:1 OBJECTID using "processed\districtnames.dta", nogen

keep gid province districtcode district 
egen group=group(gid district)
bysort group: gen n=_n
keep if n==1
expand 17
bysort group: gen year=1997+_n-1
drop n group 
save "processed\Outputprio.dta", replace


import delimited "PRIO-GRID_1997-2014.csv", clear
drop if year==2014

duplicates drop gid year, force
tsset gid year

merge m:m gid using "processed\Outputprio.dta"
tab _merge
keep if _merge == 3
drop _merge

collapse (mean) prec_gpcc (last) gid province district, by(districtcode year)

save "processed\PRIO-GRIDrainfall.dta", replace



/*******************************************************************************
*                                   NRVA Data                                  *
*******************************************************************************/

/*

*-------------------2003 Data

*Take Shura Data to first match geocodes to new district shapefile that is the correct one for the 2005 and subsequent surveys

import delimited "stata\rawdata\NRVA data\NRVA 2003\nrva 2003 geocodes districts.csv", delimiter("") clear
drop field* x y

keep provincen provincec aimsdistr nrvadisti aimsdis_1 nrvadistr villagena shuraid latitud longit objectid dist_34_na prov_34_na
drop if missing(shuraid)


rename objectid OBJECTID

unique OBJECTID
tab OBJECTID


merge m:1 OBJECTID using "stata\processed\districtnames.dta"
drop if _merge==2
drop _merge

tab districtcode
unique districtcode

drop OBJECTID prov_34_na dist_34_na PossibleSpellings

save "stata\processed\district2003merge.dta", replace 

****check for how many villages the merge to the shapefile worked
import excel "stata\rawdata\NRVA data\NRVA 2003\NRVA 2003 Male Female Shura.xls", sheet("Male Shura Section B-F") firstrow clear

rename ShuraID shuraid

merge 1:1 shuraid using  "stata\processed\district2003merge.dta"
gen nomatchshp=0
replace nomatchshp=1 if _merge==1
drop _merge

rename NRVADistictName PossibleSpellings
replace PossibleSpellings=lower(PossibleSpellings)
rename ProvinceName Province_Name	
replace Province_Name=lower(Province_Name)

replace PossibleSpellings="arghandab (kandahar)" if PossibleSpellings=="arghandab" & Province_Name=="kandahar"
replace PossibleSpellings="arghandab (zabul)" if PossibleSpellings=="arghandab" & Province_Name=="zabul"
replace PossibleSpellings="baharak (badakhshan)" if PossibleSpellings=="baharak" & Province_Name=="badakhshan"
replace PossibleSpellings="baharak (takhar)" if PossibleSpellings=="baharak" & Province_Name=="takhar"
replace PossibleSpellings="dawlatabad (balkh)" if PossibleSpellings=="dawlatabad" & Province_Name=="balkh"
replace PossibleSpellings="dawlatabad (faryab)" if PossibleSpellings=="dawlatabad" & Province_Name=="faryab"
replace PossibleSpellings="dawlatabad (balkh)" if PossibleSpellings=="dawlat abad" & Province_Name=="balkh"
replace PossibleSpellings="dawlatabad (faryab)" if PossibleSpellings=="dawlat abad" & Province_Name=="faryab"
replace PossibleSpellings="fayzabad (badakhshan)" if PossibleSpellings=="fayzabad" & Province_Name=="badakhshan"
replace PossibleSpellings="fayzabad (jawzjan)" if PossibleSpellings=="fayzabad" & Province_Name=="jawzjan"
replace PossibleSpellings="fayzabad (badakhshan)" if PossibleSpellings=="fayzabad" & Province_Name=="badakhshan"
replace PossibleSpellings="fayzabad (jawzjan)" if PossibleSpellings=="fayzabad" & Province_Name=="jawzjan"
replace PossibleSpellings="jani khel (paktya)" if PossibleSpellings=="jani khel" & Province_Name=="paktya"
replace PossibleSpellings="jani khel (paktika)" if PossibleSpellings=="jani khel" & Province_Name=="paktika"	
replace PossibleSpellings="kohistan (badakhshan)" if PossibleSpellings=="kohistan" & Province_Name=="badakhshan"
replace PossibleSpellings="kohistan (faryab)" if PossibleSpellings=="kohistan" & Province_Name=="faryab"
replace PossibleSpellings="muqur (ghazni)" if PossibleSpellings=="muqur" & Province_Name=="ghazni"
replace PossibleSpellings="muqur (badghis)" if PossibleSpellings=="muqur" & Province_Name=="badghis"		
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qarabagh" & Province_Name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qara bagh" & Province_Name=="ghazni"
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qara bagh" & Province_Name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qarrabagh" & Province_Name=="ghazni"
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qarrabagh" & Province_Name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qarabagh" & Province_Name=="ghazni"
replace PossibleSpellings="reg (hilmand)" if PossibleSpellings=="reg" & Province_Name=="hilmand"
replace PossibleSpellings="reg (kandahar)" if PossibleSpellings=="reg" & Province_Name=="kandahar"
replace PossibleSpellings="ishkamish (takhar)" if PossibleSpellings=="ishkamish" & Province_Name=="takhar"
replace PossibleSpellings="ishkashim (badakhshan)" if PossibleSpellings=="ishkashim" & Province_Name=="badakhshan"
replace PossibleSpellings="yaftali sufla" if PossibleSpellings=="yafal" & Province_Name=="badakhshan"
replace PossibleSpellings="shuhada" if PossibleSpellings=="shohada" & Province_Name=="badakhshan"
replace PossibleSpellings="shahri buzurg" if PossibleSpellings=="shahri buzur" & Province_Name=="badakhshan" 
replace PossibleSpellings="qala-i- naw" if PossibleSpellings=="qala-e-now" & Province_Name=="badghis"
replace PossibleSpellings="ab kamari" if PossibleSpellings=="sangiatash" & Province_Name=="badghis"
replace PossibleSpellings="nahrin" if PossibleSpellings=="julga" & Province_Name=="baghlan"
replace PossibleSpellings="tala wa barfak" if PossibleSpellings=="tala wa barf" & Province_Name=="baghlan"
replace PossibleSpellings="shaygal wa shiltan" if PossibleSpellings=="shigal" & Province_Name=="kunar"
replace PossibleSpellings="sirkanay" if PossibleSpellings=="sarkany" & Province_Name=="kunar"
replace PossibleSpellings="bar kunar" if PossibleSpellings=="asmar" & Province_Name=="kunar"
replace PossibleSpellings="wuza zadran" if PossibleSpellings=="jadran" & Province_Name=="paktya"
replace PossibleSpellings="shahidi hassas" if PossibleSpellings=="shahidi hass" & Province_Name=="uruzgan"

		
rename Province_Name ProvinceNameNRVA

rename district district_shp
rename districtcode districtcode_shp

merge m:1 PossibleSpellings using "stata\processed\temp.dta" 
drop if _merge==2 
drop _merge

replace district_shp=district if district_shp==""
replace districtcode_shp=districtcode if districtcode_shp==.

drop district districtcode
rename district_shp district
rename districtcode_shp districtcode

rename PossibleSpellings DistictNameNRVA
rename ProvinceCode ProvinceCodeNRVA 
keep ProvinceNameNRVA ProvinceCodeNRVA AIMSDistrictName DistictNameNRVA AIMSDistrictCode NRVADistrictCode VillageName shuraid VillageCode Kuchi_ID InterviewDate ///
nomatchshp district districtcode province longit latitud

label var nomatchshp "Indicator when match to shapefile via longititde and latitude didn't work - where I use NRVA district information"

save "stata\processed\district2003merge_nomatchshp.dta", replace 

unique districtcode
unique district


*load in each excel

import excel "stata\rawdata\NRVA data\NRVA 2003\NRVA 2003 Household data.xls", sheet("Shocks") cellrange(A2:BT11759) firstrow clear

rename Shura_Code shuraid
merge m:1 shuraid using "stata\processed\district2003merge_nomatchshp.dta"
drop if _merge==2
drop _merge

drop Province_Name Province_Code AIMS_District_Code NRVA_District_Code AIMS_District_Name NRVA_District_Name Village_Name Alternative_Village_Name

order HH_Code HH_ID HH_WeightAS Ind_WeightAS ProvinceNameNRVA- nomatchshp

save "stata\processed\NRVA2003_shocks.dta", replace	



import excel "stata\rawdata\NRVA data\NRVA 2003\NRVA 2003 Household data.xls", sheet("Assets") cellrange(A2:BE11759) firstrow	clear
rename Shura_Code shuraid
merge m:1 shuraid using "stata\processed\district2003merge_nomatchshp.dta"
drop if _merge==2
drop _merge

drop Province_Name Province_Code AIMS_District_Code NRVA_District_Code AIMS_District_Name NRVA_District_Name Village_Name Alternative_Village_Name

order HH_Code HH_ID HH_WeightAS Ind_WeightAS ProvinceNameNRVA- nomatchshp

save "stata\processed\NRVA2003_assets.dta", replace	

		
import excel "stata\rawdata\NRVA data\NRVA 2003\NRVA 2003 Household data.xls", sheet("Land") cellrange(A2:BD11759) firstrow	clear
rename Shura_Code shuraid
merge m:1 shuraid using "stata\processed\district2003merge_nomatchshp.dta"
drop if _merge==2
drop _merge

drop Province_Name Province_Code AIMS_District_Code NRVA_District_Code AIMS_District_Name NRVA_District_Name Village_Name Alternative_Village_Name

order HH_Code HH_ID HH_WeightAG Ind_WeightAG ProvinceNameNRVA- nomatchshp

save "stata\processed\NRVA2003_land.dta", replace	



import excel "stata\rawdata\NRVA data\NRVA 2003\NRVA 2003 Household data.xls", sheet("Crops") cellrange(A2:DY11759) firstrow	clear
rename Shura_Code shuraid
merge m:1 shuraid using "stata\processed\district2003merge_nomatchshp.dta"
drop if _merge==2
drop _merge

drop Province_Name Province_Code AIMS_District_Code NRVA_District_Code AIMS_District_Name NRVA_District_Name Village_Name Alternative_Village_Name

order HH_Code HH_ID HH_WeightAG Ind_WeightAG ProvinceNameNRVA- nomatchshp

save "stata\processed\NRVA2003_crops.dta", replace	




import excel "stata\rawdata\NRVA data\NRVA 2003\NRVA 2003 Household data.xls", sheet("Livestock") cellrange(A2:BF11759) firstrow	clear
rename Shura_Code shuraid
merge m:1 shuraid using "stata\processed\district2003merge_nomatchshp.dta"
drop if _merge==2
drop _merge

drop Province_Name Province_Code AIMS_District_Code NRVA_District_Code AIMS_District_Name NRVA_District_Name Village_Name Alternative_Village_Name

order HH_Code HH_ID HH_WeightAG Ind_WeightAG ProvinceNameNRVA- nomatchshp

save "stata\processed\NRVA2003_livestock.dta", replace	




import excel "stata\rawdata\NRVA data\NRVA 2003\NRVA 2003 Household data.xls", sheet("Dietary Diversity") cellrange(A2:HN11759) firstrow	clear
rename Shura_Code shuraid
merge m:1 shuraid using "stata\processed\district2003merge_nomatchshp.dta"
drop if _merge==2
drop _merge

drop Province_Name Province_Code AIMS_District_Code NRVA_District_Code AIMS_District_Name NRVA_District_Name Village_Name Alternative_Village_Name

order HH_Code HH_ID HH_WeightDD Ind_WeightDD ProvinceNameNRVA- nomatchshp

save "stata\processed\NRVA2003_dietarydiversity.dta", replace	


*Section B4 - language
import excel "stata\rawdata\NRVA data\NRVA 2003\NRVA 2003 HH _B4.xlsx", sheet("NRVA 2003 HH Member data") firstrow	clear
rename SHURA_ID shuraid
merge m:1 shuraid using "stata\processed\district2003merge_nomatchshp.dta"
drop if _merge==2
drop _merge

bysort HH_CODE: egen HHage=mean(AGE)
bysort HH_CODE: egen HHB4_Native=mode(B4_NATIVE), missing

drop UN_REGION-ALTERNATIV VILL_CODE-WG_ID QAIRE_ID  IND_CODE IND_ID ProvinceNameNRVA-longit
collapse (mean) HHage HHB4_Native (last) nomatchshp district districtcode province shuraid , by(HH_CODE)
rename HH_CODE HH_Code
save "stata\processed\NRVA2003_HH_B4.dta", replace	


****Combine all separate files of 2003
use "stata\processed\NRVA2003_shocks.dta", 	clear

merge 1:1 HH_Code using  "stata\processed\NRVA2003_assets.dta"
drop _merge

merge 1:1 HH_Code using  "stata\processed\NRVA2003_land.dta"
drop _merge	

merge 1:1 HH_Code using  "stata\processed\NRVA2003_crops.dta"
drop _merge		

merge 1:1 HH_Code using  "stata\processed\NRVA2003_livestock.dta"
drop _merge		

merge 1:1 HH_Code using  "stata\processed\NRVA2003_dietarydiversity.dta"
drop _merge	

merge 1:1 HH_Code using "stata\processed\NRVA2003_HH_B4.dta"
drop if _merge!=3
drop _merge		

*not clear which weight is the correct one - very highly correlated - so take the mean
egen hhweight2003=rowmean(HH_WeightAS HH_WeightAG HH_WeightDD)

bysort districtcode: egen dis_native=mode(HHB4_Native), missing
bysort districtcode: gen nhh=_n
bysort districtcode: egen dis_hh=max(nhh)

forvalues i=1(1)9 {
	bysort districtcode: egen counthh`i'=count(HHB4_Native) if HHB4_Native==`i'
	bysort districtcode: egen hh`i'=max(counthh`i')
	bysort districtcode: gen share_native`i'= hh`i'/dis_hh 
	bysort districtcode: replace share_native`i'=0 if share_native`i'==.
	bysort districtcode: gen dum_native`i'=0
	bysort districtcode: replace dum_native`i'=1 if share_native`i'>0
	label var share_native`i' "Share of households with native language `i' per district"
}

gen anypashtuns_nrva=0
replace anypashtuns_nrva=1 if share_native2>0

gen onlypashtuns_nrva=0
replace onlypashtuns_nrva=1 if share_native2==1

gen onlypashtuns_nrva2=0
replace onlypashtuns_nrva2=1 if share_native2>0.9

gen majpashtuns_nrva=0
replace majpashtuns_nrva=1 if dis_native==2

egen numberethnic_nrva=rowtotal(dum_native*)

gen mixed_nrva=0
replace mixed_nrva=1 if numberethnic_nrva>1

gen pashtunshare_nrva=share_native2
		
label var anypashtuns_nrva "If share of pasthun natives is > 0, using 2003 NRVA survey"
label var onlypashtuns_nrva "If share of pasthun natives is =1, using 2003 NRVA survey"
label var onlypashtuns_nrva2 "If share of pasthun natives is >0.9, using 2003 NRVA survey"
label var majpashtuns_nrva "If pashtun is the mode within a district (most common native language), using 2003 NRVA survey"
label var numberethnic_nrva "If at least two different native languages are present in district, using 2003 NRVA survey" 
label var mixed_nrva "Dummy=1 if at least two different native languages are present in district, using 2003 NRVA survey" 
label var pashtunshare_nrva "Share of households with pasthun as native language"
/*
1.  Dari 
2.  Pashto
3.  Uzbeki
4.  Turkmani 
5.  Balochi
6.  Pashaie
7.  Nooristani 
8.  Sharrai
9.  Other
*/ 


drop hh1 hh2 hh3 hh4 hh5 hh6 hh7 hh8 hh9 counthh* dis_hh nhh dum_native* share_native* dis_native

save "stata\processed\NRVAData_2003.dta", replace	


keep districtcode district anypashtuns_nrva- pashtunshare_nrva

collapse (mean) anypashtuns_nrva- pashtunshare_nrva, by(districtcode)
label var anypashtuns_nrva "If share of pasthun natives is > 0, using 2003 NRVA survey"
label var onlypashtuns_nrva "If share of pasthun natives is =1, using 2003 NRVA survey"
label var onlypashtuns_nrva2 "If share of pasthun natives is >0.9, using 2003 NRVA survey"
label var majpashtuns_nrva "If pashtun is the mode within a district (most common native language), using 2003 NRVA survey"
label var numberethnic_nrva "If at least two different native languages are present in district, using 2003 NRVA survey" 
label var mixed_nrva "Dummy=1 if at least two different native languages are present in district, using 2003 NRVA survey" 
label var pashtunshare_nrva "Share of households with pasthun as native language"		
save "stata\processed\NRVA2003_languages.dta", replace 

tab districtcode, missing




*************************************2005 Data*************************************

*** Area Name
import excel "stata\rawdata\NRVA data\NRVA 2005\Area Name New.xlsx", sheet("Area_Name_New") firstrow clear
save "stata\processed\Area Name New_2005.dta", replace


***District Info
import excel "stata\rawdata\NRVA data\NRVA 2005\District_CSO.xlsx", sheet("District_CSO") firstrow		clear
save "stata\processed\District_CSO_2005.dta", replace


***Province
import excel "stata\rawdata\NRVA data\NRVA 2005\Province.xlsx", sheet("Province") firstrow	clear
save "stata\processed\Province_2005.dta", replace


***District Prices	 /*Raw Opium Prices*/ 
import excel "stata\rawdata\NRVA data\NRVA 2005\District_Price.xlsx", sheet("District_Price") firstrow	clear
save "stata\processed\Districtprice_2005.dta", replace


*** Household Main
import excel "stata\rawdata\NRVA data\NRVA 2005\Household_Main.xlsx", sheet("Household_Main") firstrow	clear
save "stata\processed\Household_Main_2005.dta", replace


*** Section 2-6 Male
import excel "stata\rawdata\NRVA data\NRVA 2005\HH_Male_Section_2_3_4_5_6.xlsx", sheet("HH_Male_Section_2_3_4_5_6") firstrow	clear	
save "stata\processed\HHH_Male_Section_2_6_2005.dta", replace


*** Section 4-9 Male Shura
import excel "stata\rawdata\NRVA data\NRVA 2005\Male_Shura_Section_4_5_6_7_8_9.xlsx", sheet("Male_Shura_Section_4_5_6_7_8_9") firstrow clear
save "stata\processed\HH_Shura_Section_4_9_2005.dta", replace


*** Section 7,8
import excel "stata\rawdata\NRVA data\NRVA 2005\HH_Male_Section_7_8.xlsx", sheet("HH_Male_Section_7_8") firstrow	clear
save "stata\processed\HH_Male_Section_7_8_2005.dta", replace


*** Section 9-11	
import excel "stata\rawdata\NRVA data\NRVA 2005\HH_Male_Section_9_10_11.xlsx", sheet("HH_Male_Section_9_10_11") firstrow	clear
save "stata\processed\HH_Male_Section_9_10_11_2005.dta", replace


*** Section 12,13,14
import excel "stata\rawdata\NRVA data\NRVA 2005\HH_Male_Section_12_13_14.xlsx", sheet("HH_Male_Section_12_13_14") firstrow	clear
save "stata\processed\HH_Male_Section_12_13_14_2005.dta", replace


***Agriculture (inlcudes HH-ID, cluster, district etc.)
import excel "stata\rawdata\NRVA data\NRVA 2005\Agricuture.xlsx", sheet("Agricuture") firstrow	clear
save "stata\processed\Agricuture_2005.dta", replace


*** Section 15 (Food Consumption)
import excel "stata\rawdata\NRVA data\NRVA 2005\HH_Female_Section_15_Part1.xlsx", sheet("HH_Female_Section_15_Part1") firstrow	clear
destring HH_No, replace
drop Relationship_to_Head_HH
save "stata\processed\Section15_1_2005.dta", replace


import excel "stata\rawdata\NRVA data\NRVA 2005\HH_Female_Section_15_Part2.xlsx", sheet("HH_Female_Section_15_Part2") firstrow	clear
destring HH_No, replace
save "stata\processed\Section15_2_2005.dta", replace


*************Merge*************

use "stata\processed\Household_Main_2005.dta", clear
merge m:1 Cluster_Code using "stata\processed\Area Name New_2005.dta" 
drop if _merge!=3 /*loose 1 observation from master data: 210110010104501 */
drop _merge
order HH_ID Household_Code Cluster_Code HH_No RuralUrbanKuchi DistrictCode Cluster_No ProvEnglish ///
DisEnglish Area_Name Kuchi_Code ProvinceCode ProvDari Village_Dari DisDari VillageCityCode SubVillageNahiaCode

rename HH_No HH_No1
merge 1:1 HH_ID using "stata\processed\HH_Male_Section_12_13_14_2005.dta"
drop if _merge!=3 /*loose 1 observation from master data: 210110010104501 */
drop _merge
drop HH_No /*same as HH_No1 but some missing values*/
rename HH_No1 HH_No

merge 1:1 HH_ID using "stata\processed\Agricuture_2005.dta"
drop _merge

merge 1:1 HH_ID using "stata\processed\HHH_Male_Section_2_6_2005.dta"
drop if _merge!=3 /*loose 1 observation from master data: 210110010104501 */
drop _merge

rename HH_No HH_No1
merge 1:1 HH_ID using "stata\processed\HH_Male_Section_7_8_2005.dta"
drop if _merge!=3 /*loose 1 observation from master data: 210110010104501 */
drop _merge
drop HH_No /*same as HH_No1 but some missing values*/
rename HH_No1 HH_No

merge 1:1 HH_ID using "stata\processed\HH_Male_Section_9_10_11_2005.dta"
drop if _merge!=3 /*loose 1 observation from master data: 210110010104501 */
drop _merge	

merge 1:1 HH_ID using "stata\processed\Section15_1_2005.dta"
drop if _merge!=3 /*loose 1 observation from master data: 210110010104501 */
drop _merge

merge 1:1 HH_ID using "stata\processed\Section15_2_2005.dta"
drop if _merge!=3 /*loose 1 observation from master data: 210110010104501 */
drop _merge	


drop District_Code

rename DisEnglish District_Name
rename DistrictCode District_Code

***Merge with Merge_Districtnames

rename District_Name PossibleSpellings
replace PossibleSpellings=lower(PossibleSpellings)
replace PossibleSpellings="zarghun shahr" if PossibleSpellings=="khir kot"
replace PossibleSpellings="unaba" if PossibleSpellings=="panjshir2" 
replace PossibleSpellings="khinj( hisa-i-awal )" if PossibleSpellings=="panjshir 1"
replace PossibleSpellings="shahjoy" if PossibleSpellings=="seyouray"
replace PossibleSpellings="qarabagh" if PossibleSpellings=="qarabogh"
replace PossibleSpellings="ruyi du ab" if PossibleSpellings=="dara-i-sufi ulya"
rename ProvEnglish Province_Name	
replace Province_Name=lower(Province_Name)

replace PossibleSpellings="arghandab (kandahar)" if PossibleSpellings=="arghandab" & Province_Name=="kandahar"
replace PossibleSpellings="arghandab (zabul)" if PossibleSpellings=="arghandab" & Province_Name=="zabul"
replace PossibleSpellings="baharak (badakhshan)" if PossibleSpellings=="baharak" & Province_Name=="badakhshan"
replace PossibleSpellings="baharak (takhar)" if PossibleSpellings=="baharak" & Province_Name=="takhar"
replace PossibleSpellings="dawlatabad (balkh)" if PossibleSpellings=="dawlatabad" & Province_Name=="balkh"
replace PossibleSpellings="dawlatabad (faryab)" if PossibleSpellings=="dawlatabad" & Province_Name=="faryab"
replace PossibleSpellings="dawlatabad (balkh)" if PossibleSpellings=="dawlat abad" & Province_Name=="balkh"
replace PossibleSpellings="dawlatabad (faryab)" if PossibleSpellings=="dawlat abad" & Province_Name=="faryab"
replace PossibleSpellings="fayzabad (badakhshan)" if PossibleSpellings=="fayzabad" & Province_Name=="badakhshan"
replace PossibleSpellings="fayzabad (jawzjan)" if PossibleSpellings=="fayzabad" & Province_Name=="jawzjan"
replace PossibleSpellings="fayzabad (badakhshan)" if PossibleSpellings=="fayzabad" & Province_Name=="badakhshan"
replace PossibleSpellings="fayzabad (jawzjan)" if PossibleSpellings=="fayzabad" & Province_Name=="jawzjan"
replace PossibleSpellings="jani khel (paktya)" if PossibleSpellings=="jani khel" & Province_Name=="paktya"
replace PossibleSpellings="jani khel (paktika)" if PossibleSpellings=="jani khel" & Province_Name=="paktika"	
replace PossibleSpellings="kohistan (badakhshan)" if PossibleSpellings=="kohistan" & Province_Name=="badakhshan"
replace PossibleSpellings="kohistan (faryab)" if PossibleSpellings=="kohistan" & Province_Name=="faryab"
replace PossibleSpellings="muqur (ghazni)" if PossibleSpellings=="muqur" & Province_Name=="ghazni"
replace PossibleSpellings="muqur (badghis)" if PossibleSpellings=="muqur" & Province_Name=="badghis"		
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qarabagh" & Province_Name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qara bagh" & Province_Name=="ghazni"
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qara bagh" & Province_Name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qarrabagh" & Province_Name=="ghazni"
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qarrabagh" & Province_Name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qarabagh" & Province_Name=="ghazni"
replace PossibleSpellings="reg (hilmand)" if PossibleSpellings=="reg" & Province_Name=="hilmand"
replace PossibleSpellings="reg (kandahar)" if PossibleSpellings=="reg" & Province_Name=="kandahar"
replace PossibleSpellings="ishkamish (takhar)" if PossibleSpellings=="ishkamish" & Province_Name=="takhar"
replace PossibleSpellings="ishkashim (badakhshan)" if PossibleSpellings=="ishkashim" & Province_Name=="badakhshan"

rename Province_Name ProvinceNameNRVA

merge m:1 PossibleSpellings using "stata\processed\temp.dta" 
drop if _merge==2 | _merge==1
drop _merge
rename ProvinceCode ProvinceCodeNRVA

gen  NRVA_District_Code=District_Code
drop PossibleSpellings District_Code
order HH_ID Household_Code Cluster_Code HH_No province districtcode district

unique districtcode
unique district

gen year=2005
label var year "Year"
rename HH_ID hhid 
order hhid province districtcode district year Household_Code Cluster_Code HH_No

rename HHWeight hhweight2005


save "stata\processed\NRVAData_2005.dta", replace
	
	
*------------------------2007 Data
foreach X in Area_Name CF2 CM3 Core_hh District_Price Female_Section_15_Food_1 Female_Section_15_Food_2 S2A S2B S4 S5A S5C S8 S10 S12 S13 S14 S16{
	use "stata/rawdata/NRVA data/NRVA 2007/`X'.dta", clear
	save "stata/processed/`X'.dta", replace
}


use "stata\processed\Core_hh.dta", clear
merge m:1 hhid using "stata\processed\Area_Name.dta"
drop _merge	



foreach var in S2A S2B S4 S5A S5C S8 S10 S12 S13 S14 Female_Section_15_Food_1 Female_Section_15_Food_2 S16  {	
	merge 1:1 hhid using "stata/processed/`var'.dta"
	drop if _merge==2
	drop _merge
}


merge m:1 cid using "stata\processed\CM3.dta"
drop _merge /*keep unmatched data - 400 observations from master file*/


rename District_Code District_Code_PROBLEM 
rename districtc District_Code
rename districtn District_Name

rename District_Name PossibleSpellings
replace PossibleSpellings=lower(PossibleSpellings)
replace PossibleSpellings="fayzabad" if PossibleSpellings=="faizabad"

rename provincen Province_Name
replace Province_Name=lower(Province_Name)

replace PossibleSpellings="arghandab (kandahar)" if PossibleSpellings=="arghandab" & Province_Name=="kandahar"
replace PossibleSpellings="arghandab (zabul)" if PossibleSpellings=="arghandab" & Province_Name=="zabul"
replace PossibleSpellings="baharak (badakhshan)" if PossibleSpellings=="baharak" & Province_Name=="badakhshan"
replace PossibleSpellings="baharak (takhar)" if PossibleSpellings=="baharak" & Province_Name=="takhar"
replace PossibleSpellings="dawlatabad (balkh)" if PossibleSpellings=="dawlatabad" & Province_Name=="balkh"
replace PossibleSpellings="dawlatabad (faryab)" if PossibleSpellings=="dawlatabad" & Province_Name=="faryab"
replace PossibleSpellings="dawlatabad (balkh)" if PossibleSpellings=="dawlat abad" & Province_Name=="balkh"
replace PossibleSpellings="dawlatabad (faryab)" if PossibleSpellings=="dawlat abad" & Province_Name=="faryab"
replace PossibleSpellings="fayzabad (badakhshan)" if PossibleSpellings=="fayzabad" & Province_Name=="badakhshan"
replace PossibleSpellings="fayzabad (jawzjan)" if PossibleSpellings=="fayzabad" & Province_Name=="jawzjan"
replace PossibleSpellings="fayzabad (badakhshan)" if PossibleSpellings=="fayzabad" & Province_Name=="badakhshan"
replace PossibleSpellings="fayzabad (jawzjan)" if PossibleSpellings=="fayzabad" & Province_Name=="jawzjan"
replace PossibleSpellings="jani khel (paktya)" if PossibleSpellings=="jani khel" & Province_Name=="paktya"
replace PossibleSpellings="jani khel (paktika)" if PossibleSpellings=="jani khel" & Province_Name=="paktika"	
replace PossibleSpellings="kohistan (badakhshan)" if PossibleSpellings=="kohistan" & Province_Name=="badakhshan"
replace PossibleSpellings="kohistan (faryab)" if PossibleSpellings=="kohistan" & Province_Name=="faryab"
replace PossibleSpellings="muqur (ghazni)" if PossibleSpellings=="muqur" & Province_Name=="ghazni"
replace PossibleSpellings="muqur (badghis)" if PossibleSpellings=="muqur" & Province_Name=="badghis"		
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qarabagh" & Province_Name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qara bagh" & Province_Name=="ghazni"
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qara bagh" & Province_Name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qarrabagh" & Province_Name=="ghazni"
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qarrabagh" & Province_Name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qarabagh" & Province_Name=="ghazni"
replace PossibleSpellings="reg (hilmand)" if PossibleSpellings=="reg" & Province_Name=="hilmand"
replace PossibleSpellings="reg (kandahar)" if PossibleSpellings=="reg" & Province_Name=="kandahar"
replace PossibleSpellings="ishkamish (takhar)" if PossibleSpellings=="eshkamesh" & Province_Name=="takhar"
replace PossibleSpellings="ishkashim (badakhshan)" if PossibleSpellings=="eshkashim" & Province_Name=="badakhshan"

rename Province_Name ProvinceNameNRVA

merge m:1 PossibleSpellings using "stata\processed\temp.dta"
drop if _merge==2 | _merge==1 /*Kohistan in Herat - there is no Kohistan in Herat*/
drop _merge
gen NRVA_District_Code=District_Code
drop PossibleSpellings District_Code /*we only need spelling of mergefile_districtname, dropped NRVA codes to prevent confusion*/
rename provincec ProvinceCodeNRVA
order hhid cid stratum hhmebcnt qrt hh_weight mem_weight  province districtcode district

unique districtcode

	
merge m:1 cid using "stata\processed\CF2.dta" 
drop if _merge==2

drop f_2_1_1- _merge
gen year="2007" if strpos(dateintrv,"2007")  |  strpos(dateintrv,"2207")
replace year="2008" if strpos(dateintrv,"2008") |  strpos(dateintrv,"2508") |  strpos(dateintrv,"2028")
destring year, replace
drop if year==. /*shall we do so */
label var year "Year"
order hhid province districtcode district year cid stratum 

rename hh_weight hhweight2007


save "stata\processed\NRVAData_2007.dta", replace



*------------------------------------2011 Data
foreach X in Core_household Core_male_shura F_23 M_04 M_06 M_07a M_07c M_09 M_10 M_13 Price{
	use "stata/rawdata/NRVA data/NRVA 2011/`X'.dta", clear
	save "stata/processed/`X'.dta", replace
}
*first create districtcode districtname data


use "stata\processed\Core_male_shura.dta"
bysort District_Code:gen n=_n
keep if n==1
keep District_Code District_Name
save "stata\processed\DistrictCodeandName.dta", replace


use "stata\processed\Core_household.dta", clear /*already includes district province codes*/

foreach var in M_04 M_06 M_07a M_07c M_09 M_10 M_13 F_23 {	
	merge 1:1 hhid using "stata/processed/`var'.dta"
	drop if _merge==2 /*all matched*/
	drop _merge
}		
	
merge m:1  District_Code  using "stata\processed\DistrictCodeandName.dta"
order hhid hh_weight ind_weight Resident_Location_Code Province_Code Province_Name District_Code District_Name
drop _merge

rename District_Name PossibleSpellings
replace PossibleSpellings=lower(PossibleSpellings)
replace Province_Name=lower(Province_Name)
replace PossibleSpellings="fayzabad" if PossibleSpellings=="faizabad"
rename Province_Name province_name


replace PossibleSpellings="arghandab (kandahar)" if PossibleSpellings=="arghandab" & province_name=="kandahar"
replace PossibleSpellings="arghandab (zabul)" if PossibleSpellings=="arghandab" & province_name=="zabul"
replace PossibleSpellings="baharak (badakhshan)" if PossibleSpellings=="baharak" & province_name=="badakhshan"
replace PossibleSpellings="baharak (takhar)" if PossibleSpellings=="baharak" & province_name=="takhar"
replace PossibleSpellings="dawlatabad (balkh)" if PossibleSpellings=="dawlatabad" & province_name=="balkh"
replace PossibleSpellings="dawlatabad (faryab)" if PossibleSpellings=="dawlatabad" & province_name=="faryab"
replace PossibleSpellings="dawlatabad (balkh)" if PossibleSpellings=="dawlat abad" & province_name=="balkh"
replace PossibleSpellings="dawlatabad (faryab)" if PossibleSpellings=="dawlat abad" & province_name=="faryab"
replace PossibleSpellings="fayzabad (badakhshan)" if PossibleSpellings=="fayzabad" & province_name=="badakhshan"
replace PossibleSpellings="fayzabad (jawzjan)" if PossibleSpellings=="fayzabad" & province_name=="jawzjan"
replace PossibleSpellings="fayzabad (badakhshan)" if PossibleSpellings=="fayzabad" & province_name=="badakhshan"
replace PossibleSpellings="fayzabad (jawzjan)" if PossibleSpellings=="fayzabad" & province_name=="jawzjan"
replace PossibleSpellings="jani khel (paktya)" if PossibleSpellings=="jani khel" & province_name=="paktya"
replace PossibleSpellings="jani khel (paktika)" if PossibleSpellings=="jani khel" & province_name=="paktika"	
replace PossibleSpellings="kohistan (badakhshan)" if PossibleSpellings=="kohistan" & province_name=="badakhshan"
replace PossibleSpellings="kohistan (faryab)" if PossibleSpellings=="kohistan" & province_name=="faryab"
replace PossibleSpellings="muqur (ghazni)" if PossibleSpellings=="muqur" & province_name=="ghazni"
replace PossibleSpellings="muqur (badghis)" if PossibleSpellings=="muqur" & province_name=="badghis"		
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qarabagh" & province_name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qara bagh" & province_name=="ghazni"
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qara bagh" & province_name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qarrabagh" & province_name=="ghazni"
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qarrabagh" & province_name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qarabagh" & province_name=="ghazni"
replace PossibleSpellings="reg (hilmand)" if PossibleSpellings=="reg" & province_name=="hilmand"
replace PossibleSpellings="reg (kandahar)" if PossibleSpellings=="reg" & province_name=="kandahar"
replace PossibleSpellings="ishkamish (takhar)" if PossibleSpellings=="eshkamesh" & province_name=="takhar"
replace PossibleSpellings="ishkashim (badakhshan)" if PossibleSpellings=="ishkashim" & province_name=="badakhshan"
rename province_name ProvinceNameNRVA

merge m:1 PossibleSpellings using "stata\processed\temp.dta"
drop if _merge==2
drop _merge

gen NRVA_District_Code=District_Code
drop PossibleSpellings District_Code

rename Province_Code ProvinceCodeNRVA
order hhid hh_weight ind_weight Resident_Location_Code province districtcode district

gen year=int_year_c
label var year "Year"
order hhid province districtcode district year

rename hh_weight hhweight2011

unique districtcode


save "stata\processed\NRVAData_2011.dta", replace
	
	
*--------------------Aggregate Data by districts



*************************************2005 Data*************************************
use "stata\processed\NRVAData_2005.dta", clear
drop if RuralUrbanKuchi=="Kuchi"
drop if Kuchi_Code=="1"
drop Kuchi_Code
sort districtcode

bysort districtcode: gen n=_n
bysort districtcode: egen maxn=max(n)
drop n
rename maxn n



gen urbandummy=1 if RuralUrbanKuchi=="Urban"
gen urban=0
bysort districtcode: egen urban1=max(urbandummy)
replace urban=urban1 if urban1==1
drop urban1


rename O Main_Crops_Garden_7_8
rename  AA Main_Crops_Irrigated_7_13
rename AQ Main_Crops_Rainfed_7_19

*What are the main crops produced in either garden/irrigated land or rainfed land
gen crop1_opium=1 if Main_Crops_Garden_7_8==19 | Main_Crops_Irrigated_7_13==19 | Main_Crops_Rainfed_7_19==19
gen crop1_wheat=1 if Main_Crops_Garden_7_8==1 | Main_Crops_Irrigated_7_13==1 | Main_Crops_Rainfed_7_19==1
gen crop1_maize=1 if Main_Crops_Garden_7_8==2 | Main_Crops_Irrigated_7_13==2| Main_Crops_Rainfed_7_19==2
gen crop1_barley=1 if Main_Crops_Garden_7_8==3 | Main_Crops_Irrigated_7_13==3 | Main_Crops_Rainfed_7_19==3
gen crop1_rice=1 if Main_Crops_Garden_7_8==4 | Main_Crops_Irrigated_7_13==4 | Main_Crops_Rainfed_7_19==4
gen crop1_potato=1 if Main_Crops_Garden_7_8==5 | Main_Crops_Irrigated_7_13==5 | Main_Crops_Rainfed_7_19==5

foreach var of varlist crop1* {
replace `var'=0 if `var'!=1
replace `var'=. if Main_Crops_Garden_7_8==. & Main_Crops_Irrigated_7_13==. & Main_Crops_Rainfed_7_19==.
} 

*What are your household's income generating activities (out of 6 possible answers that were ranked according to importance)?
* 5.	Prod & sales of Opium and 10.	Opium wage labour 
gen income_opium=1 if G==5 | G==10 | H==5 | H==10 | I==5 | I==10 | J==5 | J==10 | K==5 | K==10 | L==5 | L==10
replace income_opium=0 if income_opium!=1
replace income_opium=. if G==. & H==. & I==. & J==. & K==. & L==.

collapse (last) province district urban n  (sum) No_Shocks_13_1 Insecurity_13_1 Grew_Opium_13_1 Opium_Eradication_13_1 Theft_13_1 ///
crop1* income_opium , by(districtcode)
bysort districtcode: gen insec_share= Insecurity_13_1/n
bysort districtcode: gen opium1_share= crop1_opium/n
bysort districtcode: gen opiumshock_share=Grew_Opium_13_1/n
bysort districtcode: gen incomeopium_share=income_opium/n

gen year=2005

*all shocks refer to period within the last twelve months
label var urban "Is the district urban or rural - 1 for urban" 
label var No_Shocks_13_1 "District-level sum of HH that experienced no shock" 
label var Insecurity_13_1 "District-level sum of HH that experienced an insecurity/violence shock" 
label var Grew_Opium_13_1 "District-level sum of HH that grew opium last season but not this season " 
label var Opium_Eradication_13_1 "District-level sum of HH that experienced a shock caused by opium eradication"
label var Theft_13_1 "District-level sum of HH that experienced theft and/or violence "
label var crop1_opium "District-level sum: opium first important crop"
label var crop1_wheat "District-level sum: wheat first important crop"
label var crop1_maize "District-level sum: maize first important crop"
label var crop1_barley "District-level sum: barley first important crop"
label var crop1_rice "District-level sum: rice first important crop"
label var income_opium "District-level sum: HH receives income from opium activities as one of the six most important income sources (production/sales and wage)"
label var insec_share "District-level share of HH that experienced an insecurity-shock"

save "stata\processed\NRVAData_2005_collapsed.dta", replace


*************************************2007 Data*************************************
use "stata\processed\NRVAData_2007.dta", clear
drop if urk==3
sort districtcode
unique districtcode

bysort districtcode: gen n=_n 
bysort districtcode: egen maxn=max(n)
drop n
rename maxn n



gen urbandummy=1 if urk==1
gen urban=0
bysort districtcode: egen urban1=max(urbandummy)
replace urban=urban1 if urban1==1
drop urban1

*What were the three most important crops you harvested in the last summer (q_4_15) and winter (q_4_19) cultivation season?
gen crop1_opium=1 if q_4_15_1==24 | q_4_15_2==24 | q_4_15_2==24 | q_4_19_1==24 | q_4_19_2==24 | q_4_19_3==24
gen crop1_wheat=1 if q_4_15_1==1 | q_4_15_2==1| q_4_15_3==1 | q_4_19_1==1| q_4_19_2==1| q_4_19_3==1
gen crop1_maize=1 if  q_4_15_1==2 | q_4_15_2==2| q_4_15_3==2 | q_4_19_1==2| q_4_19_2==2| q_4_19_3==2
gen crop1_barley=1 if  q_4_15_1==3 | q_4_15_2==3| q_4_15_3==3 | q_4_19_1==3| q_4_19_2==3| q_4_19_3==3
gen crop1_rice=1 if  q_4_15_1==4 | q_4_15_2==4| q_4_15_3==4 | q_4_19_1==4| q_4_19_2==4| q_4_19_3==4
gen crop1_potato=1 if  q_4_15_1==13 | q_4_15_2==13| q_4_15_3==13 | q_4_19_1==13| q_4_19_2==13| q_4_19_3==13

foreach var of varlist crop1* {
replace `var'=0 if `var'!=1
replace `var'=. if q_4_15_1==. & q_4_15_2==. & q_4_15_2==. &  q_4_19_1==. & q_4_19_2==. & q_4_19_3==.
} 

*What are your household's income generating activities (out of 6 possible answers that were ranked according to importance)?
* 5.	Prod & sales of Opium and 10.	Opium wage labour 
gen income_opium=1 if q_8_1_1==5 | q_8_1_1==10 | q_8_1_2==5 | q_8_1_2==10 | q_8_1_3==5 | q_8_1_3==10 | q_8_1_4==5 | q_8_1_4==10 | q_8_1_5==5 | q_8_1_5==10 | q_8_1_6==5 | q_8_1_6==10
replace income_opium=0 if income_opium!=1
replace income_opium=. if q_8_1_1==. & q_8_1_1==. & q_8_1_2==. &  q_8_1_3==. & q_8_1_4==. & q_8_1_5==. & q_8_1_6==.

rename q_13_1_1 No_Shocks_13_1
rename q_13_1_6 Opium_Eradication_13_1
rename q_13_1_7 Grew_Opium_13_1
rename q_13_1_9 Insecurity_13_1
rename q_13_1_29 Theft_13_1 


collapse (last) province district urban n  (sum)  No_Shocks_13_1 Insecurity_13_1 Grew_Opium_13_1 Opium_Eradication_13_1 Theft_13_1 ///
crop1* income_opium , by(districtcode)
bysort districtcode: gen insec_share= Insecurity_13_1/n
bysort districtcode: gen opium1_share= crop1_opium/n
bysort districtcode: gen opiumshock_share=Grew_Opium_13_1/n
bysort districtcode: gen incomeopium_share=income_opium/n

gen year=2007


*all shocks refer to period within the last twelve months
label var urban "Is the district urban or rural - 1 for urban" 
label var No_Shocks_13_1 "District-level sum of HH that experienced no shock" 
label var Insecurity_13_1 "District-level sum of HH that experienced an insecurity/violence shock" 
label var Grew_Opium_13_1 "District-level sum of HH that grew opium last season but not this season " 
label var Opium_Eradication_13_1 "District-level sum of HH that experienced a shock caused by opium eradication"
label var Theft_13_1 "District-level sum of HH that experienced theft and/or violence "
label var crop1_opium "District-level sum: opium first important crop"
label var crop1_wheat "District-level sum: wheat first important crop"
label var crop1_maize "District-level sum: maize first important crop"
label var crop1_barley "District-level sum: barley first important crop"
label var crop1_rice "District-level sum: rice first important crop"
label var income_opium "District-level sum: HH receives income from opium activities as one of the six most important income sources (production/sales and wage)"
label var insec_share "District-level share of HH that experienced an insecurity-shock"


save "stata\processed\NRVAData_2007_collapsed.dta", replace


*************************************2011 Data*************************************
use "stata\processed\NRVAData_2011.dta", clear

drop if Resident_Location_Code==3
sort districtcode
unique districtcode

bysort districtcode: gen n=_n
bysort districtcode: egen maxn=max(n)
drop n
rename maxn n


gen urbandummy=1 if Resident_Location_Code==1
gen urban=0
bysort districtcode: egen urban1=max(urbandummy)
replace urban=urban1 if urban1==1
drop urban1


*What were the three most important crops you harvested in the last summer (q_4_15) and winter (q_4_19) cultivation season?
gen crop1_opium=1 if Q_6_10==10 | Q_6_12==10 | Q_6_14==10 | Q_6_22==10 | Q_6_24==10 | Q_6_26==10 | Q_6_30==10 | Q_6_32==10 
gen crop1_wheat=1 if Q_6_10==1 | Q_6_12==1 | Q_6_14==1 | Q_6_22==1 | Q_6_24==1 | Q_6_26==1 | Q_6_30==1 | Q_6_32==1
gen crop1_maize=1 if  Q_6_10==2 | Q_6_12==2 | Q_6_14==2 | Q_6_22==2 | Q_6_24==2 | Q_6_26==2 | Q_6_30==2 | Q_6_32==2
gen crop1_barley=1 if  Q_6_10==3 | Q_6_12==3 | Q_6_14==3 | Q_6_22==3| Q_6_24==3 | Q_6_26==3 | Q_6_30==3 | Q_6_32==3
gen crop1_rice=1 if  Q_6_10==4 | Q_6_12==4 | Q_6_14==4 | Q_6_22==4 | Q_6_24==4 | Q_6_26==4 | Q_6_30==4 | Q_6_32==4
gen crop1_potato=1 if  Q_6_10==13 | Q_6_12==13 | Q_6_14==13 | Q_6_22==13 | Q_6_24==13 | Q_6_26==13 | Q_6_30==13 | Q_6_32==13

foreach var of varlist crop1* {
	replace `var'=0 if `var'!=1
	replace `var'=. if Q_6_10==. & Q_6_12==. & Q_6_14==. & Q_6_22==. & Q_6_24==. & Q_6_26==. & Q_6_30==. & Q_6_32==10 
} 

*What are your household's income generating activities (out of 6 possible answers that were ranked according to importance)?
* 5.	Prod & sales of Opium and 10.	Opium wage labour 
gen income_opium=1 if Q_9_1==5 | Q_9_1==2 | Q_9_3==5 | Q_9_3==2 | Q_9_5==5 | Q_9_5==2 | Q_9_7==5 | Q_9_7==2
replace income_opium=0 if income_opium!=1
replace income_opium=. if Q_9_1==. & Q_9_1==. & Q_9_3==. & Q_9_3==. & Q_9_5==. & Q_9_5==. & Q_9_7==. & Q_9_7==2 

gen No_Shocks_13_1=0
replace No_Shocks_13_1=1 if Q_13_1_a==2 & Q_13_1_b==2 & Q_13_1_c==2 & Q_13_1_d==2 & Q_13_1_e==2 & Q_13_1_f==2 & Q_13_1_g==2 & Q_13_1_h==2 & Q_13_1_i==2 & ///
Q_13_1_j==2 & Q_13_1_k==2 & Q_13_1_l==2 & Q_13_1_m==2 & Q_13_1_n==2 & Q_13_1_o==2 & Q_13_1_p==2 & Q_13_1_q==2 & Q_13_1_r==2 & Q_13_1_s==2 & Q_13_1_t==2 & ///
 Q_13_1_u==2 & Q_13_1_v==2 & Q_13_1_w==2 & Q_13_1_x==2 & Q_13_1_y==2 & Q_13_1_z==2 & Q_13_1_aa==2 & Q_13_1_ab==2 & Q_13_1_ac==2  & Q_13_1_ad==2
/*shock variable is defined: 1 yes and 2 no shock*/
 
rename Q_13_1_e Opium_Eradication_13_1
rename Q_13_1_f Grew_Opium_13_1 /*in 2007 it's called grew opoium last season but not this season */
rename Q_13_1_h Insecurity_13_1
rename Q_13_1_ab Theft_13_1 /*in 2007 survey it's called theft and violence*/

foreach var in Opium_Eradication_13_1 Insecurity_13_1 Grew_Opium_13_1 Theft_13_1 {
	replace `var'=0 if `var'==2
} 

collapse (last) province district Resident_Location_Code urban n (sum) No_Shocks_13_1 Insecurity_13_1 Grew_Opium_13_1 Opium_Eradication_13_1 Theft_13_1 ///
crop1* income_opium , by(districtcode)
bysort districtcode: gen insec_share= Insecurity_13_1/n
bysort districtcode: gen opium1_share= crop1_opium/n
bysort districtcode: gen opiumshock_share=Grew_Opium_13_1/n
bysort districtcode: gen incomeopium_share=income_opium/n

gen year=2011

label var urban "Is the district urban or rural - 1 for urban" 
label var No_Shocks_13_1 "District-level sum of HH that experienced no shock" 
label var Insecurity_13_1 "District-level sum of HH that experienced an insecurity/violence shock" 
label var Grew_Opium_13_1 "District-level sum of HH that grew opium last season but not this season " 
label var Opium_Eradication_13_1 "District-level sum of HH that experienced a shock caused by opium eradication"
label var Theft_13_1 "District-level sum of HH that experienced theft and/or violence "
label var crop1_opium "District-level sum: opium first important crop"
label var crop1_wheat "District-level sum: wheat first important crop"
label var crop1_maize "District-level sum: maize first important crop"
label var crop1_barley "District-level sum: barley first important crop"
label var crop1_rice "District-level sum: rice first important crop"
label var income_opium "District-level sum: HH receives income from opium activities as one of the six most important income sources (production/sales and wage)"
label var insec_share "District-level share of HH that experienced an insecurity-shock"

unique districtcode

save "stata\processed\NRVAData_2011_collapsed.dta", replace

********************************************************************************


*---------------------------2005 Data  
											
use "stata\processed\NRVAData_2005.dta", clear


*Farmers/Income activities
*What are your household's income generating activities (out of 6 possible answers that were ranked according to importance)?
gen agrincome=.
replace agrincome=1 if G==1 | H==1 | I==1 | J==1 | K==1 | L==1 | 	///
G==2 | H==2 | I==2 | J==2 | K==2 | L==2 | ///
G==3 | H==3 | I==3 | J==3 | K==3 | L==3 | ///
G==4 | H==4 | I==4 | J==4 | K==4 | L==4 | ///
G==5 | H==5 | I==5 | J==5 | K==5 | L==5 | ///
G==6 | H==6 | I==6 | J==6 | K==6 | L==6 | ///
G==7 | H==7 | I==7 | J==7 | K==7 | L==7 | ///
G==9 | H==9 | I==9 | J==9 | K==9 | L==9 | ///
G==10 | H==10 | I==10 | J==10 | K==10 | L==10 | ///
G==11 | H==11 | I==11 | J==11 | K==11 | L==11 | ///
G==12 | H==12 | I==12 | J==12 | K==12 | L==12 
replace agrincome=0 if agrincome!=1
replace agrincome=. if  G==. & H==. & I==. & J==. & K==. & L==.

*Opium business:  5.	Prod & sales of Opium and 10.	Opium wage labour 
gen opiumincome=1 if G==5 | G==10 | H==5 | H==10 | I==5 | I==10 | J==5 | J==10 | K==5 | K==10 | L==5 | L==10
replace opiumincome=0 if opiumincome!=1
replace opiumincome=. if G==. & H==. & I==. & J==. & K==. & L==.

*Income from any other source than agriculture (employment not pensions)
gen otheremployment=.
replace otheremployment=1 if 	G==13 | H==13 | I==13 | J==13 | K==13 | L==13 | 	///
G==14 | H==14 | I==14 | J==14 | K==14 | L==14 | ///
G==15 | H==15 | I==15 | J==15 | K==15 | L==15 | ///
G==16 | H==16 | I==16 | J==16 | K==16 | L==16 | ///
G==17 | H==17 | I==17 | J==17 | K==17 | L==17 | ///
G==18 | H==18 | I==18 | J==18 | K==18 | L==18 | ///
G==19 | H==19 | I==19 | J==19 | K==19 | L==19 | ///
G==20 | H==20 | I==20 | J==20 | K==20 | L==20 | ///
G==21 | H==21 | I==21 | J==21 | K==21 | L==21 | ///
G==22 | H==22 | I==22 | J==22 | K==22 | L==22 | ///
G==23 | H==23 | I==23 | J==23 | K==23 | L==23 | ///
G==24 | H==24 | I==24 | J==24 | K==24 | L==24

replace otheremployment=0 if otheremployment!=1
replace otheremployment=. if  G==. & H==. & I==. & J==. & K==. & L==.
	
		

*Assets

*overlap of all 3 waves: Radio/Tape, Refrigerator, TV, VCR/DVD, Sewing Machine, Thuraya (take any phone), Bicycle, Motorcycle, Tractor/Thresher, Car
gen Tractor_Thresher=1 if Tractor_5_1==1 |Combine_Thresher_5_1==1 
replace Tractor_Thresher=0 if  Tractor_5_1==0 & Combine_Thresher_5_1==0

egen sumassets=rowtotal( Radio_5_1 Refrigerator_5_1 TV_5_1 VCR_5_1 ///
Sewing_Machine_5_1  Thuraya_5_1 Bicycle_5_1 Motorcycle_5_1 Tractor_Thresher Car_5_1 ), missing

*Following Filmer and Scott (2008, pp. 15–16) - see diploma thesis: asset is weighted by the proportion of households not possessing the specif-ic item
foreach X of varlist Watch_5_1 Carpets_5_1 Gilim_5_1 Radio_5_1 Refrigerator_5_1 TV_5_1 VCR_5_1 ///
	Sewing_Machine_5_1 Rug_Weaving_Loom_5_1 Carpentary_5_1 Generator_5_1 Thuraya_5_1 Hand_Cart_5_1 ///
	Bicycle_5_1 Motorcycle_5_1 Tractor_5_1 Combine_Thresher_5_1 Plough_5_1 Cereal_Grinder_Mill_5_1 Car_5_1 Truck_5_1  Tractor_Thresher {
	egen Yes`X'=total(`X'), missing
	gen n`X'=_n if `X'!=.
	egen mn`X'=max(n`X')
	gen weight_`X'=1-Yes`X'/mn`X'
	drop mn`X' n`X' Yes`X'
}

gen sumassets_weighted=weight_Radio_5_1* Radio_5_1 + weight_Refrigerator_5_1* Refrigerator_5_1 ///
+ weight_TV_5_1*TV_5_1+ weight_VCR_5_1 *VCR_5_1+ weight_Sewing_Machine_5_1*Sewing_Machine_5_1 + weight_Thuraya_5_1 * Thuraya_5_1 +  weight_Bicycle_5_1 *Bicycle_5_1 + weight_Motorcycle_5_1 *Motorcycle_5_1 ///
+ weight_Tractor_Thresher*Tractor_Thresher + weight_Car_5_1 * Car_5_1

drop weight* Tractor_Thresher



*economic situation: subjective indicator (women's assessment)
gen economicimprove=Compare_Overall_Economic_15_17


keep hhid- Cluster_No agrincome- economicimprove
		
	
label var agrincome "Dummy=1 if HH receives any income (irrespective of order of importance) from agricultural sector including opium and livestock, inlcuding production for home consumption (in 2005 and 2007 survey)"
label var opiumincome "Dummy=1 if HH received any income from opium production or sales or opium wage labour"
label var otheremployment "Dummy=1 if HH receives income from another employment (no agriculture, no pensions etc.)"
label var sumassets "Sum of assets (the overlap of the 3 waves): Radio/Tape, Refrigerator, TV, VCR/DVD, Sewing Machine, Thuraya (take any phone), Bicycle, Motorcycle, Tractor/Thresher, Car, max 10"
label var sumassets_weighted "Weighted sum of assets as defined in sumassets, weighted by the proportion of households not possessing the specific item following Filmer and Scott (2008, pp. 15–16)"
label var economicimprove "How do you compare the overall economic situation of the HH with 1 year ago? 1 much worse, 2 slightly worse, 3 same, 4 slightly better, 5 much better"

rename hhid hhid2005
drop Household_Code Cluster_Code HH_No RuralUrbanKuchi Cluster_No

save "stata\processed\covariates2005.dta", replace


*----------------------------------2007/08 Data  

use "stata\processed\NRVAData_2007.dta", clear


*Farmers/Income activities
*What are your household's income generating activities (out of 6 possible answers that were ranked according to importance)?
gen agrincome=.
replace agrincome=1 if q_8_1_1==1 | q_8_1_2==1 | q_8_1_3==1 | q_8_1_4==1 | q_8_1_5==1 | q_8_1_6==1 | 	///
q_8_1_1==2 | q_8_1_2==2 | q_8_1_3==2 | q_8_1_4==2 | q_8_1_5==2 | q_8_1_6==2 | ///
q_8_1_1==3 | q_8_1_2==3 | q_8_1_3==3 | q_8_1_4==3 | q_8_1_5==3 | q_8_1_6==3 | ///
q_8_1_1==4 | q_8_1_2==4 | q_8_1_3==4 | q_8_1_4==4 | q_8_1_5==4 | q_8_1_6==4 | ///
q_8_1_1==5 | q_8_1_2==5 | q_8_1_3==5 | q_8_1_4==5 | q_8_1_5==5 | q_8_1_6==5 | ///
q_8_1_1==6 | q_8_1_2==6 | q_8_1_3==6 | q_8_1_4==6 | q_8_1_5==6 | q_8_1_6==6 | ///
q_8_1_1==7 | q_8_1_2==7 | q_8_1_3==7 | q_8_1_4==7 | q_8_1_5==7 | q_8_1_6==7 | ///
q_8_1_1==9 | q_8_1_2==9 | q_8_1_3==9 | q_8_1_4==9 | q_8_1_5==9 | q_8_1_6==9 | ///
q_8_1_1==10 | q_8_1_2==10 | q_8_1_3==10 | q_8_1_4==10 | q_8_1_5==10 | q_8_1_6==10 | ///
q_8_1_1==11 | q_8_1_2==11 | q_8_1_3==11 | q_8_1_4==11 | q_8_1_5==11 | q_8_1_6==11 | ///
q_8_1_1==12 | q_8_1_2==12 | q_8_1_3==12 | q_8_1_4==12 | q_8_1_5==12 | q_8_1_6==12


replace agrincome=0 if agrincome!=1
replace agrincome=. if  q_8_1_1==. & q_8_1_2==. & q_8_1_3==. & q_8_1_4==. & q_8_1_5==. & q_8_1_6==.

*Opium business:  5.	Prod & sales of Opium and 10.	Opium wage labour 
gen opiumincome=1 if q_8_1_1==5 | q_8_1_1==10 | q_8_1_2==5 | q_8_1_2==10 | q_8_1_3==5 | q_8_1_3==10 | q_8_1_4==5 | q_8_1_4==10 | q_8_1_5==5 | q_8_1_5==10 | q_8_1_6==5 | q_8_1_6==10
replace opiumincome=0 if opiumincome!=1
replace opiumincome=. if q_8_1_1==. & q_8_1_1==. & q_8_1_2==. &  q_8_1_3==. & q_8_1_4==. & q_8_1_5==. & q_8_1_6==.


*Income from any other source than agriculture (employment not pensions)
gen otheremployment=.
replace otheremployment=1 if q_8_1_1==13 | q_8_1_2==13 | q_8_1_3==13 | q_8_1_4==13 | q_8_1_5==13 | q_8_1_6==13 | 	///
q_8_1_1==14 | q_8_1_2==14 | q_8_1_3==14 | q_8_1_4==14 | q_8_1_5==14 | q_8_1_6==14 | ///
q_8_1_1==15 | q_8_1_2==15 | q_8_1_3==15 | q_8_1_4==15 | q_8_1_5==15 | q_8_1_6==15 | ///
q_8_1_1==16 | q_8_1_2==16 | q_8_1_3==16 | q_8_1_4==16 | q_8_1_5==16 | q_8_1_6==16 | ///
q_8_1_1==17 | q_8_1_2==17 | q_8_1_3==17 | q_8_1_4==17 | q_8_1_5==17 | q_8_1_6==17 | ///
q_8_1_1==18 | q_8_1_2==18 | q_8_1_3==18 | q_8_1_4==18 | q_8_1_5==18 | q_8_1_6==18 | ///
q_8_1_1==19 | q_8_1_2==19 | q_8_1_3==19 | q_8_1_4==19 | q_8_1_5==19 | q_8_1_6==19 | ///
q_8_1_1==20 | q_8_1_2==20 | q_8_1_3==20 | q_8_1_4==20 | q_8_1_5==20 | q_8_1_6==20 | ///
q_8_1_1==21 | q_8_1_2==21 | q_8_1_3==21 | q_8_1_4==21 | q_8_1_5==21 | q_8_1_6==21 | ///
q_8_1_1==22 | q_8_1_2==22 | q_8_1_3==22 | q_8_1_4==22 | q_8_1_5==22 | q_8_1_6==22 | ///
q_8_1_1==23 | q_8_1_2==23 | q_8_1_3==23 | q_8_1_4==23 | q_8_1_5==23 | q_8_1_6==23 | ///
q_8_1_1==24 | q_8_1_2==24 | q_8_1_3==24 | q_8_1_4==24 | q_8_1_5==24 | q_8_1_6==24

replace otheremployment=0 if otheremployment!=1
replace otheremployment=. if q_8_1_1==. & q_8_1_1==. & q_8_1_2==. &  q_8_1_3==. & q_8_1_4==. & q_8_1_5==. & q_8_1_6==.



*Assets
*overlap of all 3 waves: Radio/Tape, Refrigerator, TV, VCR/DVD, Sewing Machine, Thuraya (take any phone), Bicycle, Motorcycle, Tractor/Thresher, Car
gen phone=0 if q_5_1_8==0 & q_5_7_1==0
replace phone=1 if q_5_1_8==1| q_5_7_1==1

gen tractorthresher=0 if q_5_1_12==0 & q_5_1_13==0
replace tractorthresher=1 if q_5_1_12==1 & q_5_1_13==1

foreach X of varlist q_5_1_1- q_5_7_4 {
	replace `X'=1 if `X'>=1 & `X'!=.
}

egen sumassets=rowtotal(q_5_1_1 q_5_1_3 q_5_1_5 q_5_1_6 q_5_1_7 q_5_1_9 q_5_1_10 q_5_1_11 phone tractorthresher ), missing


*Following Filmer and Scott (2008, pp. 15–16) - see diploma thesis: asset is weighted by the proportion of households not possessing the specif-ic item
foreach X of varlist q_5_1_1- q_5_7_4 phone tractorthresher {
	egen Yes`X'=total(`X'), missing
	gen n`X'=_n if `X'!=.
	egen mn`X'=max(n`X')
	gen weight_`X'=1-Yes`X'/mn`X'
	drop mn`X' n`X' Yes`X'
}

gen sumassets_weighted=weight_q_5_1_1 * q_5_1_1 + weight_q_5_1_3 * q_5_1_3 + weight_q_5_1_5 *q_5_1_5 + weight_q_5_1_6 * q_5_1_6 + ///
weight_q_5_1_7 *q_5_1_7 + weight_q_5_1_9 * q_5_1_9 + weight_q_5_1_10 * q_5_1_10 + weight_q_5_1_11 * q_5_1_11 +  weight_phone * phone + weight_tractorthresher * tractorthresher

drop weight*


*economic situation: subjective indicator (women's assessment)
gen economicimprove=q_16_7


keep hhid- stratum agrincome- economicimprove



label var agrincome "Dummy=1 if HH receives any income (irrespective of order of importance) from agricultural sector including opium and livestock, inlcuding production for home consumption (in 2005 and 2007 survey)"
label var opiumincome "Dummy=1 if HH received any income from opium production or sales or opium wage labour"
label var otheremployment "Dummy=1 if HH receives income from another employment (no agriculture, no pensions etc.)"
label var sumassets "Sum of assets (the overlap of the 3 waves): Radio/Tape, Refrigerator, TV, VCR/DVD, Sewing Machine, Thuraya (take any phone), Bicycle, Motorcycle, Tractor/Thresher, Car, max 10"
label var sumassets_weighted "Weighted sum of assets as defined in sumassets, weighted by the proportion of households not possessing the specific item following Filmer and Scott (2008, pp. 15–16)"
label var economicimprove "How do you compare the overall economic situation of the HH with 1 year ago? 1 much worse, 2 slightly worse, 3 same, 4 slightly better, 5 much better"

rename hhid hhid2007
drop cid stratum
save "stata\processed\covariates2007.dta", replace


*--------------------------------2011/12 Data  
use "stata\processed\NRVAData_2011.dta", clear	

*Farmers/Income activities
*What are your household's income generating activities (out of 6 possible answers that were ranked according to importance)?
gen agrincome=.
replace agrincome=1 if Q_9_1==1 | Q_9_3==1 | Q_9_5==1 | Q_9_7==1 | ///
Q_9_1==2 | Q_9_3==2 | Q_9_5==2 | Q_9_7==2 | ///
Q_9_1==3 | Q_9_3==3 | Q_9_5==3 | Q_9_7==3 | ///
Q_9_1==4 | Q_9_3==4 | Q_9_5==4 | Q_9_7==4 | ///
Q_9_1==5 | Q_9_3==5 | Q_9_5==5 | Q_9_7==5 | ///
Q_9_1==6 | Q_9_3==6 | Q_9_5==6 | Q_9_7==6 | ///
Q_9_1==7 | Q_9_3==7 | Q_9_5==7 | Q_9_7==7  ///


replace agrincome=0 if agrincome!=1
replace agrincome=. if  Q_9_1==. &  Q_9_3==. & Q_9_5==.  &  Q_9_7==.

*Opium business:  5.	Prod & sales of Opium and 10.	Opium wage labour 
gen opiumincome=1 if Q_9_1==2 | Q_9_1==5 | Q_9_3==2 | Q_9_3==5 |  Q_9_5==2 |  Q_9_5==5 | Q_9_7==2 | Q_9_7==5 
replace opiumincome=0 if opiumincome!=1
replace opiumincome=. if Q_9_1==. &  Q_9_3==. & Q_9_5==.  &  Q_9_7==.


*Income from any other source than agriculture (employment not pensions)
gen otheremployment=.
replace otheremployment=1 if 	Q_9_1==11 | Q_9_3==11 | Q_9_5==11 | Q_9_7==11 | ///
Q_9_1==12 | Q_9_3==12 | Q_9_5==12 | Q_9_7==12 | ///
Q_9_1==13 | Q_9_3==13 | Q_9_5==13 | Q_9_7==13 | ///
Q_9_1==14 | Q_9_3==14 | Q_9_5==14 | Q_9_7==14 | ///
Q_9_1==15 | Q_9_3==15 | Q_9_5==15 | Q_9_7==15 | ///
Q_9_1==16 | Q_9_3==16 | Q_9_5==16 | Q_9_7==16 | ///
Q_9_1==17 | Q_9_3==17 | Q_9_5==17 | Q_9_7==17 | ///
Q_9_1==21 | Q_9_3==21 | Q_9_5==21 | Q_9_7==21 | ///
Q_9_1==22 | Q_9_3==22 | Q_9_5==22 | Q_9_7==22 | ///
Q_9_1==23 | Q_9_3==23 | Q_9_5==23 | Q_9_7==23 | ///
Q_9_1==24 | Q_9_3==24 | Q_9_5==24 | Q_9_7==24 | ///
Q_9_1==25 | Q_9_3==25 | Q_9_5==25 | Q_9_7==25 | ///
Q_9_1==26 | Q_9_3==26 | Q_9_5==26 | Q_9_7==26 | ///
Q_9_1==27 | Q_9_3==27 | Q_9_5==27 | Q_9_7==27 | ///
Q_9_1==28 | Q_9_3==28 | Q_9_5==28 | Q_9_7==28 | ///
Q_9_1==29 | Q_9_3==29 | Q_9_5==29 | Q_9_7==29 | ///
Q_9_1==30 | Q_9_3==30 | Q_9_5==30 | Q_9_7==30 | ///
Q_9_1==41 | Q_9_3==41 | Q_9_5==41 | Q_9_7==41 | ///
Q_9_1==42 | Q_9_3==42 | Q_9_5==42 | Q_9_7==42 | ///
Q_9_1==43 | Q_9_3==43 | Q_9_5==43 | Q_9_7==43 | ///
Q_9_1==51 | Q_9_3==51 | Q_9_5==51 | Q_9_7==51 | ///
Q_9_1==52 | Q_9_3==52 | Q_9_5==52 | Q_9_7==52 


replace otheremployment=0 if otheremployment!=1
replace otheremployment=. if  Q_9_1==. &  Q_9_3==. & Q_9_5==.  &  Q_9_7==.


*Assets
*overlap of all 3 waves: Radio/Tape, Refrigerator, TV, VCR/DVD, Sewing Machine, Thuraya (take any phone), Bicycle, Motorcycle, Tractor/Thresher, Car
foreach X of varlist Q_7_1_Refrigerator Q_7_1_Sewing_Machine Q_7_1_Radio Q_7_1_TV Q_7_1_VCR Q_7_1_Satellite_Phone Q_7_1_Bicycle Q_7_1_Motorcycle Q_7_1_Car Q_7_1_Tractor {
replace `X'=1 if `X'>=1 & `X'!=.
}

egen sumassets=rowtotal(Q_7_1_Refrigerator Q_7_1_Sewing_Machine Q_7_1_Radio Q_7_1_TV Q_7_1_VCR Q_7_1_Satellite_Phone Q_7_1_Bicycle Q_7_1_Motorcycle Q_7_1_Car Q_7_1_Tractor ), missing


*Following Filmer and Scott (2008, pp. 15–16) - see diploma thesis: asset is weighted by the proportion of households not possessing the specif-ic item
foreach X of varlist Q_7_1_Refrigerator Q_7_1_Sewing_Machine Q_7_1_Radio Q_7_1_TV Q_7_1_VCR Q_7_1_Satellite_Phone Q_7_1_Bicycle Q_7_1_Motorcycle Q_7_1_Car Q_7_1_Tractor {
	egen Yes`X'=total(`X'), missing
	gen n`X'=_n if `X'!=.
	egen mn`X'=max(n`X')
	gen weight_`X'=1-Yes`X'/mn`X'
	drop mn`X' n`X' Yes`X'
}

gen sumassets_weighted=weight_Q_7_1_Refrigerator * Q_7_1_Refrigerator + weight_Q_7_1_Sewing_Machine * Q_7_1_Sewing_Machine + weight_Q_7_1_Radio *Q_7_1_Radio + weight_Q_7_1_TV * Q_7_1_TV + ///
weight_Q_7_1_VCR *Q_7_1_VCR + weight_Q_7_1_Satellite_Phone * Q_7_1_Satellite_Phone + weight_Q_7_1_Bicycle * Q_7_1_Bicycle + weight_Q_7_1_Motorcycle * Q_7_1_Motorcycle + ///
weight_Q_7_1_Car * Q_7_1_Car + weight_Q_7_1_Tractor * Q_7_1_Tractor

drop weight*

*economic situation: subjective indicator (women's assessment)
gen economicimprove=Q_13_7


keep hhid- Shura_id ///
agrincome- economicimprove


label var agrincome "Dummy=1 if HH receives any income (irrespective of order of importance) from agricultural sector including opium and livestock, inlcuding production for home consumption (in 2005 and 2007 survey)"
label var otheremployment "Dummy=1 if HH receives income from another employment (no agriculture, no pensions etc.)"
label var opiumincome "Dummy=1 if HH received any income from opium production or sales or opium wage labour"
label var sumassets "Sum of assets (the overlap of the 3 waves): Radio/Tape, Refrigerator, TV, VCR/DVD, Sewing Machine, Thuraya (take any phone), Bicycle, Motorcycle, Tractor/Thresher, Car, max 10"
label var sumassets_weighted "Weighted sum of assets as defined in sumassets, weighted by the proportion of households not possessing the specific item following Filmer and Scott (2008, pp. 15–16)"
label var economicimprove "How do you compare the overall economic situation of the HH with 1 year ago? 1 much worse, 2 slightly worse, 3 same, 4 slightly better, 5 much better"

rename hhid hhid2011
drop ind_weight Resident_Location_Code ProvinceCodeNRVA ProvinceNameNRVA Shura_id
		
save "stata\processed\covariates2011.dta", replace


*-------------------------Append data 	

use "stata\processed\covariates2005.dta", clear		

append using "stata\processed\covariates2007.dta"		
append using "stata\processed\covariates2011.dta"	

gen hhid=hhid2005 if year==2005
replace hhid=hhid2007 if year==2007 | year==2008
replace hhid=hhid2011 if year==2011 | year==2012
drop tractorthresher
label var phone "Indicates whether HH has any type of phone"

unique districtcode


save "stata\processed\covariates.dta", replace



*------------------------------------2005 Data 


*process price dataset
import excel "stata\rawdata\NRVA data\NRVA 2005\District_Price.xlsx", sheet("District_Price") firstrow clear

replace  Vegetable_Oil="." if Vegetable_Oil=="5 0"
replace Honey="." if Honey=="4 0"
replace Krut="." if Krut=="7 0" | Krut=="000" | Krut=="040"| Krut=="050"| Krut=="060"| Krut=="066"| Krut=="070"| Krut=="075"| Krut=="080"| Krut=="085"| Krut=="090"
replace Fish="." if Fish=="4 0" | Fish=="000" | Fish=="020" | Fish=="030" | Fish=="060" | Fish=="080" | Fish=="090" 
replace Beef="." if Beef=="0 0" | Beef=="000" | Beef=="014" | Beef=="090" | Beef=="1 5" 
replace Raisins="." if Raisins=="048" | Raisins=="045" | Raisins=="040" | Raisins=="025" | Raisins=="023" | Raisins=="020" | Raisins=="0" | Raisins=="000" | Raisins=="4 0" | Raisins=="5 0" | Raisins=="6 0" ///
| Raisins=="050" | Raisins=="052" | Raisins=="054" | Raisins=="054" | Raisins=="055"| Raisins=="060"| Raisins=="065"| Raisins=="070" | Raisins=="078" | Raisins=="080" | Raisins=="085"| Raisins=="090" | Raisins=="095"
replace Dried_Apricots="." if Dried_Apricots=="048" | Dried_Apricots=="040" | Dried_Apricots=="025" |  Dried_Apricots=="020" | Dried_Apricots=="026" | Dried_Apricots=="000" | Dried_Apricots=="4 0" | ///
Dried_Apricots=="030" | Dried_Apricots=="035" | Dried_Apricots=="036" |  Dried_Apricots=="055" | Dried_Apricots=="050" | Dried_Apricots=="060" | Dried_Apricots=="080" | Dried_Apricots=="085"

replace Walnuts="." if Walnuts=="060" | Walnuts=="052" | Walnuts=="050" | Walnuts=="045" | Walnuts=="035" | Walnuts=="030" | Walnuts=="0" | Walnuts=="000" | Walnuts=="4 0" | Walnuts=="6 0" ///
| Walnuts=="065" | Walnuts=="070" | Walnuts=="080" | Walnuts=="085" 

	
foreach X of varlist Barley- Impoted_Wheat_Flour_Harvest {
	replace `X'="." if `X'=="0"
	destring `X', replace
}

egen Wheat_Flour=rowmean(Wheat_Flour_Local_First_Price Wheat_Flour_Local_Second_Price Wheat_Flour_Local_Third_Price Wheat_Flour_Import_First_Price Wheat_Flour_Import_Second_Pric Wheat_Flour_Import_Third_Price)

egen Rice=rowmean(Rice_Local_First_Price Rice_Local_Second_Price Rice_Local_Third_Price Rice_Imported_First_Price Rice_Imported_Second_Price Rice_Imported_Third_Price)


collapse (mean) Wheat_Flour Rice Barley Maize Potato Sweet_Potato Spinach Wild_Leaves Broccoli Leek Carrots Cucumber Okra Eggplant Cauliflower Tomato Radish Turnip Cabbage ///
Coriander Mint Onion Hot_Pepper Dried_Tomato Dried_Vegetable Peach Mulberry Apricot Pomegranate Orange Apple Grapes Water_Melon_Cantelope Plum Raisins Dried_Apricots  ///
Dry_Mulberry Beans Mung_Beans Chick_Peas Lentils Walnuts Pistachio Almonds Beef Chicken Lamb Goat Liver Fish Dried_Meat Eggs Milk Yogurt Dogh Cheese Krut ///
Vegetable_Oil Ghee Animal_Fat Butter Sugar Honey Brown_Sugar, by(District_Code)

rename District_Code NRVA_District_Code

save "stata\processed\NRVA_Prices_2005", replace

use "stata\processed\NRVAData_2005.dta", clear

merge m:1 NRVA_District_Code using "stata\processed\NRVA_Prices_2005"
drop if _merge!=3
drop _merge
	

	
*****************************************************************************************
*********Food Consumption Expenditure
*****************************************************************************************

	* in the question on food I only take 
	* 1. Purchase 2. Own production 3. Bartered/Payment in kind 4. Borrowed/taken on credit 5. Received as gift 6. Food aid 7. Other	/*Other is only an option in 2007, 2011 */
	*Following Deaton & Zaidi, include food from all sources

foreach X of varlist Wheat_Flour_15_6 Rice_15_6 Barley_15_6 Maize_15_6 Beans_15_6 Mung_15_6 Chick_Peas_15_6 Lentils_15_6 Vegetable_Oil_15_6 ///
Ghee_15_6 Milk_15_6 Dogh_15_6 Yogurt_15_6 Sugar_15_6 Brown_Sugar_15_6 Honey_15_6 Dried_Tomato_15_6 Dried_Vegetable_15_6 Raisins_15_6 ///
Fresh_Mulberries_15_6 Dried_Mulberries_15_6 Walnuts_15_6 Pistachio_15_6 Almonds_15_6 Pasta_Macaroni_15_6 Beef_15_6 Lamb_15_6 Goat_15_6 ///
Dried_Meat_15_6 Liver_15_6 Chicken_15_6 Fish_15_6 Butter_15_6 Animal_Fat_15_6 Krut_15_6 Cheese_15_6 Potato_15_6 Sweet_Potato_15_6 Onion_15_6 ///
Tomato_15_6 Okra_15_6 Spinach_15_6 Cauliflower_15_6 Eggplant_15_6 Carrots_15_6 Pumpkin_15_6 Cucumber_15_6 Turnip_15_6 Radish_15_6 Cabbage_15_6 ///
Leek_15_6 Broccoli_15_6 Hot_Pepper_15_6 Wild_Leaves_15_6 Apple_15_6 Grapes_15_6 Melon_Water_Melon_15_6 Peach_15_6 Fresh_Apricot_15_6 ///
Dried_Appricot_15_6 Orange_15_6 Pomegranate_15_6 Plum_15_6 Pear_15_6 Banana_15_6 Purchased_Nan_15_6 Egg_No_15_6 {
	replace `X'=0 if `X'==.
}

rename Mung_Beans Mung
rename Eggs Egg_No
rename Water_Melon_Cantelope Melon_Water_Melon 
rename Apricot Fresh_Apricot


rename Mulberry Fresh_Mulberries 
rename Dry_Mulberry Dried_Mulberries

/*no price data for: 
Pasta_Macaroni 
Pumpkin
Pear
Banana
Purchased_Nan
*/

rename Dried_Appricot_15_6 Dried_Apricot_15_6
rename Dried_Apricots Dried_Apricot

sort province
	foreach X of varlist Wheat_Flour- Brown_Sugar {
	bysort province: egen `X'_p=median(`X')
}

foreach X of varlist Wheat_Flour- Brown_Sugar {	
	egen `X'_n=median(`X')
}
	
sort districtcode
foreach X of varlist Wheat_Flour- Brown_Sugar {	
	bysort districtcode: egen `X'_d=median(`X')
}	

foreach X of varlist Wheat_Flour Rice Barley Maize Beans Mung Chick_Peas Lentils Vegetable_Oil ///
Ghee Milk Dogh Yogurt Sugar Brown_Sugar Honey Dried_Tomato Dried_Vegetable Raisins ///
Walnuts Pistachio Almonds  Beef Lamb Goat ///
Dried_Meat Liver Chicken Fish Butter Animal_Fat Krut Cheese Potato Sweet_Potato Onion ///
Tomato Okra Spinach Cauliflower Eggplant Carrots  Cucumber Turnip Radish Cabbage ///
Leek Broccoli Hot_Pepper Wild_Leaves Apple Grapes Melon_Water_Melon Peach Fresh_Apricot  ///
Dried_Apricot Fresh_Mulberries Dried_Mulberries Orange Pomegranate Plum  Egg_No {
	gen exp`X'=`X'*`X'_15_6
	replace exp`X'=`X'_p*`X'_15_6 if exp`X'==.
	replace exp`X'=`X'_n*`X'_15_6 if exp`X'==.
}

egen HHexpenditure_perweek2005=rowtotal(exp*), missing

replace HHexpenditure_perweek=. if Wheat_Flour_15_6==. &  Rice_15_6==. &  Barley_15_6==. &  Maize_15_6==. &  Beans_15_6==. &  Mung_15_6==. &  Chick_Peas_15_6==. &  Lentils_15_6==. &  Vegetable_Oil_15_6==. &  ///
Ghee_15_6==. &  Milk_15_6==. &  Dogh_15_6==. &  Yogurt_15_6==. &  Sugar_15_6==. &  Brown_Sugar_15_6==. &  Honey_15_6==. &  Dried_Tomato_15_6==. &  Dried_Vegetable_15_6==. &  Raisins_15_6==. &  ///
Fresh_Mulberries_15_6==. &  Dried_Mulberries_15_6==. &  Walnuts_15_6==. &  Pistachio_15_6==. &  Almonds_15_6==. &  Pasta_Macaroni_15_6==. &  Beef_15_6==. &  Lamb_15_6==. &  Goat_15_6==. &  ///
Dried_Meat_15_6==. &  Liver_15_6==. &  Chicken_15_6==. &  Fish_15_6==. &  Butter_15_6==. &  Animal_Fat_15_6==. &  Krut_15_6==. &  Cheese_15_6==. &  Potato_15_6==. &  Sweet_Potato_15_6==. &  Onion_15_6==. &  ///
Tomato_15_6==. &  Okra_15_6==. &  Spinach_15_6==. &  Cauliflower_15_6==. &  Eggplant_15_6==. &  Carrots_15_6==. &  Pumpkin_15_6==. &  Cucumber_15_6==. &  Turnip_15_6==. &  Radish_15_6==. &  Cabbage_15_6==. &  ///
Leek_15_6==. &  Broccoli_15_6==. &  Hot_Pepper_15_6==. &  Wild_Leaves_15_6==. &  Apple_15_6==. &  Grapes_15_6==. &  Melon_Water_Melon_15_6==. &  Peach_15_6==. &  Fresh_Apricot_15_6==. &  ///
Dried_Apricot_15_6==. &  Orange_15_6==. &  Pomegranate_15_6==. &  Plum_15_6==. &  Pear_15_6==. &  Banana_15_6==. &  Purchased_Nan_15_6==. &  Egg_No_15_6==. 

drop exp*


rename Pasta_Macaroni_15_6 Pasta_15_6


*prepare to merge together with other waves to get expenditure in constant price
foreach X of varlist Wheat_Flour Rice Barley Maize Beans Mung Chick_Peas Lentils Vegetable_Oil ///
Ghee Milk Dogh Yogurt Sugar Brown_Sugar Honey Dried_Tomato Dried_Vegetable Raisins ///
Fresh_Mulberries Dried_Mulberries Walnuts Pistachio Almonds Beef Lamb Goat ///
Dried_Meat Liver Chicken Fish Butter Animal_Fat Krut Cheese Potato Sweet_Potato Onion ///
Tomato Okra Spinach Cauliflower Eggplant Carrots Cucumber Turnip Radish Cabbage ///
Leek Broccoli Hot_Pepper Wild_Leaves Apple Grapes Melon_Water_Melon Peach Fresh_Apricot ///
Dried_Apricot Orange Pomegranate Plum  Egg_No {
	rename `X'_15_6 amount_`X'
	rename amount_`X', lower
}

rename amount_melon_water_melon amount_melon
rename amount_wheat_flour amount_wheat_flour
rename Pasta_15_6 amount_pasta
rename Pumpkin_15_6 amount_pumpkin
rename Pear_15_6 amount_pear
rename Purchased_Nan_15_6 amount_purchased_nan
rename amount_egg_no amount_eggs
rename Banana_15_6 amount_banana
rename amount_milk amount_milk_fresh
rename amount_sugar amount_white_sugar
rename amount_dried_vegetable amount_dried_vegetables
rename amount_lamb amount_mutton
rename amount_dried_apricot  amount_dried_apricots 	
rename amount_dried_tomato amount_dried_tomatoes	
*At the end have to replace 0 total consumption with missing if all items are missing 
	
	
rename Egg_No_d eggs_d2005
rename Egg_No_p eggs_p2005
rename Egg_No_n eggs_n2005


foreach X of varlist Wheat_Flour_d- Brown_Sugar_d  {	
	rename `X' `X'2005
	rename `X'2005, lower
}

foreach X of varlist Wheat_Flour_p- Brown_Sugar_p  {	
	rename `X' `X'2005
	rename `X'2005, lower
}	

foreach X of varlist Wheat_Flour_n- Brown_Sugar_n  {	
	rename `X' `X'2005
	rename `X'2005, lower
}

gen fooddrinksoutside=Food_Drinks_Consumed_10_11
label var fooddrinksoutside "HH expenditure for Food & drinks consumed outside the home, in current prices"


keep hhid hhweight province districtcode district year amount*  wheat_flour_p2005 - HHexpenditure_perweek2005 fooddrinksoutside

rename hhid hhid2005



save "stata\processed\foodconsumption_2005.dta", replace
		
	
*------------------------------------2007 Data	

*process price dataset
use "stata\processed\District_Price.dta", clear

foreach X of varlist a_p_3 a_p_4 a_p_5 a_p_6 a_p_7 a_p_8 a_p_9 a_p_10 a_p_12 a_p_13 a_p_14 a_p_15 a_p_16 a_p_17 a_p_18 a_p_19 a_p_21 a_p_22 a_p_23 a_p_24 ///
a_p_25 a_p_26 a_p_27 a_p_28 a_p_29 a_p_30 a_p_32 a_p_34 a_p_35 a_p_36 a_p_37 a_p_38 a_p_39 a_p_40 a_p_41 a_p_42 a_p_43 a_p_44 a_p_45 a_p_46 a_p_47 a_p_48 ///
a_p_49 a_p_50 a_p_51 a_p_52 a_p_53 a_p_54 a_p_55 a_p_56 a_p_57 a_p_58 a_p_60 a_p_61 a_p_62 a_p_63 a_p_64 a_p_65 a_p_66 a_p_67 a_p_68 a_p_69 a_p_70 a_p_71 ///
a_p_72 a_p_73 a_p_74 a_p_75 a_p_76 a_p_94 a_p_95 a_p_96 a_p_97 a_p_98 a_p_91 a_p_92 a_p_93 a_p_33 a_p_80 a_p_79 a_p_78 {
	local variable_label : variable label `X'
	local variable_label : subinstr local variable_label "Price of" ""
	local variable_label : subinstr local variable_label "-Kg" ""
	local variable_label : subinstr local variable_label " " ""
	label variable `X' "`variable_label'"
}

egen wheat_flour=rowmean(a_p_94 a_p_95)
egen rice=rowmean(a_p_96 a_p_97)

label var a_p_3 "Purchased_nan"
label var a_p_5 "Maize"
label var a_p_8 "Chick_peas"
label var a_p_10 "Pasta"
label var a_p_18 "Dried_meat"
label var a_p_21 "Milk_fresh"
label var a_p_22 "Milk_powdered"
label var a_p_24 "Curd"
label var a_p_25 "Krut"
label var a_p_30 "Egg"
label var a_p_32 "Vegetable_oil"
label var a_p_34 "Other_oil"
label var a_p_36 "Sweet_potato"
label var a_p_44 "Pumpkin"
label var a_p_51 "Fresh_pepper"
label var a_p_52 "Wild_leaves"
label var a_p_55 "Dried_tomato"
label var a_p_56 "Dried_vegetables"
label var a_p_57 "Pickled_vegetables"
label var a_p_58 "Green_beans"
label var a_p_62 "Melon"
label var a_p_64 "Dried_apricot"
label var a_p_65 "Orange_citrus"
label var a_p_71 "Fresh_mulberries"
label var a_p_72 "Dried_mulberries"
label var a_p_33 "Animal_fat"
label var a_p_79 "Brown_sugar"
label var a_p_78 "White_sugar"


drop a_x_1 a_x_2 a_p_93 a_p_92 a_p_91 a_p_90 a_p_89 a_p_88 a_p_87 a_p_86 a_p_84 a_p_83 a_p_82 a_p_101 a_p_100 a_p_99 a_p_98 a_p_97 a_p_96 a_p_95 a_p_94 a_p_81

foreach v of varlist a_p_3-a_p_78  {
	local x : variable label `v'
	rename `v' `x'
	rename `x', lower
}
		
		
		
collapse (mean) purchased_nan-rice , by(districtc)

rename districtc NRVA_District_Code

save "stata\processed\NRVA_Prices_2007.dta", replace

use "stata\processed\NRVAData_2007.dta", clear

merge m:1 NRVA_District_Code using "stata\processed\NRVA_Prices_2007"
drop if _merge==2
drop _merge


foreach X of varlist q_15_6_2 q_15_6_1 q_15_6_4 q_15_6_5 q_15_6_6 q_15_6_7 q_15_6_8 q_15_6_9 q_15_6_32 q_15_6_27 ///
q_15_6_21 q_15_6_26 q_15_6_23 q_15_6_78 q_15_6_79 q_15_6_80 q_15_6_55 q_15_6_56 q_15_6_70 q_15_6_71 q_15_6_72 ///
q_15_6_74 q_15_6_75 q_15_6_76 q_15_6_10 q_15_6_12 q_15_6_14 q_15_6_15 q_15_6_18 q_15_6_17 q_15_6_16 q_15_6_19 ///
q_15_6_28 q_15_6_33 q_15_6_25 q_15_6_29 q_15_6_35 q_15_6_36 q_15_6_37 q_15_6_38 q_15_6_39 q_15_6_40 q_15_6_41 q_15_6_42 ///
q_15_6_43 q_15_6_44 q_15_6_45 q_15_6_47 q_15_6_46 q_15_6_48 q_15_6_49 q_15_6_50 q_15_6_51 q_15_6_52 ///
q_15_6_60 q_15_6_61 q_15_6_62 q_15_6_63 q_15_6_64 q_15_6_65 q_15_6_67 q_15_6_66 q_15_6_68 q_15_6_69 q_15_6_3 q_15_6_30 ///
q_15_6_13 q_15_6_22 q_15_6_24 q_15_6_57 q_15_6_58 q_15_6_73 {
	local variable_label : variable label `X'
	local variable_label : subinstr local variable_label "Amount of " "amount_"
	local variable_label : subinstr local variable_label " " "_"
	label variable `X' "`variable_label'"
}

label var q_15_6_77 "amount_otherfruit"
label var q_15_6_10 "amount_pasta"
label var q_15_6_24 "amount_curd"
label var q_15_6_52 "amount_wild_leaves"


foreach X of varlist q_15_6_2 q_15_6_1 q_15_6_4 q_15_6_5 q_15_6_6 q_15_6_7 q_15_6_8 q_15_6_9 q_15_6_32 q_15_6_27 ///
q_15_6_21 q_15_6_26 q_15_6_23 q_15_6_78 q_15_6_79 q_15_6_80 q_15_6_55 q_15_6_56 q_15_6_70 q_15_6_71 q_15_6_72 ///
q_15_6_74 q_15_6_75 q_15_6_76 q_15_4_10 q_15_6_12 q_15_6_14 q_15_6_15 q_15_6_18 q_15_6_17 q_15_6_16 q_15_6_19 ///
q_15_6_28 q_15_6_33 q_15_6_25 q_15_6_29 q_15_6_35 q_15_6_36 q_15_6_37 q_15_6_38 q_15_6_39 q_15_6_40 q_15_6_41 q_15_6_42 ///
q_15_6_43 q_15_6_44 q_15_6_45 q_15_6_47 q_15_6_46 q_15_6_48 q_15_6_49 q_15_6_50 q_15_6_51 q_15_6_52 ///
q_15_6_60 q_15_6_61 q_15_6_62 q_15_6_63 q_15_6_64 q_15_6_65 q_15_6_67 q_15_6_66 q_15_6_68 q_15_6_69 q_15_6_3 q_15_6_30 ///
q_15_6_13 q_15_6_22 q_15_6_24 q_15_6_57 q_15_6_58 q_15_6_73 {
	replace `X'=0 if `X'==.
}


foreach v of varlist q_15_6_2 q_15_6_1 q_15_6_4 q_15_6_5 q_15_6_6 q_15_6_7 q_15_6_8 q_15_6_9 q_15_6_32 q_15_6_27 ///
q_15_6_21 q_15_6_26 q_15_6_23 q_15_6_78 q_15_6_79 q_15_6_80 q_15_6_55 q_15_6_56 q_15_6_70 q_15_6_71 q_15_6_72 {
	local x : variable label `v'
	rename `v' `x'
}


foreach v of varlist q_15_6_74 q_15_6_75 q_15_6_76 q_15_6_10 q_15_6_12 q_15_6_14 q_15_6_15 q_15_6_18 q_15_6_17 q_15_6_16 q_15_6_19 ///
{
	local x : variable label `v'
	rename `v' `x'
}
	
foreach v of varlist q_15_6_28 q_15_6_33 q_15_6_25 q_15_6_29 q_15_6_35 q_15_6_36 q_15_6_37 q_15_6_38 q_15_6_39 q_15_6_40 q_15_6_41 q_15_6_42  {
	local x : variable label `v'
	rename `v' `x'
}


foreach v of varlist q_15_6_43 q_15_6_44 q_15_6_45 q_15_6_47 q_15_6_46 q_15_6_48 q_15_6_49 q_15_6_50 q_15_6_51 q_15_6_52 {
	local x : variable label `v'
	rename `v' `x'
}


foreach v of varlist q_15_6_60 q_15_6_61 q_15_6_62 q_15_6_63 q_15_6_64 q_15_6_65 q_15_6_67 q_15_6_66 q_15_6_68 q_15_6_69 q_15_6_3 q_15_6_30 ///
q_15_6_13 q_15_6_22 q_15_6_24 q_15_6_57 q_15_6_58 q_15_6_73 {
	local x : variable label `v'
	rename `v' `x'
}

rename amount_lamb amount_mutton
rename amount_milk amount_milk_fresh 
rename egg eggs	
rename orange_citrus  orange
rename q_15_6_34 amount_other_oil
rename amount_hot_pepper amount_hot_pepper
rename amount_walnut amount_walnuts
rename amount_chickpeas amount_chick_peas
rename amount_dried_vegetable amount_dried_vegetables
rename amount_pickled_vegetable amount_pickled_vegetables
rename amount_green_bean amount_green_beans
rename fresh_pepper hot_pepper
rename amount_dried_tomato amount_dried_tomatoes	
rename amount_dried_apricot  amount_dried_apricots 

foreach X of varlist purchased_nan- rice {
	bysort province: egen `X'_province=median(`X')
}

foreach X of varlist purchased_nan- rice {
	egen `X'_national=median(`X')
}


drop mint coriander

foreach X of varlist barley- rice {
	gen exp`X'=`X'*amount_`X'
	replace exp`X'=`X'_province*amount_`X' if exp`X'==.
	replace exp`X'=`X'_national*amount_`X' if exp`X'==.
}

egen HHexpenditure_perweek2007=rowtotal(exp*), missing

*for 73 items

replace HHexpenditure_perweek=. if amount_rice==. & amount_wheat_flour==. & amount_purchased_nan==. & amount_barley==. & amount_maize==. & amount_beans==. & amount_mung==. ///
& amount_chick_peas==. & amount_lentils==. & amount_pasta==. & amount_beef==. & amount_veal==. & amount_mutton==. & amount_goat==. & amount_chicken==. & amount_liver==. & ///
amount_dried_meat==. & amount_fish==. & amount_milk_fresh==. & amount_milk_powdered==. & amount_yogurt==. & amount_curd==. & amount_krut==. & amount_dogh==. & amount_ghee==. ///
& amount_butter==. & amount_cheese==. & amount_eggs==. & amount_vegetable_oil==. & amount_animal_fat==. & amount_other_oil==. & amount_potato==. & amount_sweet_potato==. & ///
amount_onion==. & amount_tomato==. & amount_okra==. & amount_spinach==. & amount_cauliflower==. & amount_eggplant==. & amount_carrots==. & amount_pumpkin==. & amount_cucumber==. ///
& amount_radish==. & amount_turnip==. & amount_cabbage==. & amount_leek==. & amount_broccoli==. & amount_hot_pepper==. & amount_wild_leaves==. & amount_dried_tomato==. & ///
amount_dried_vegetables==. & amount_pickled_vegetables==. & amount_green_beans==. & amount_apple==. & amount_grapes==. & amount_melon==. & amount_peach==. & amount_dried_apricot==. ///
& amount_orange==. & amount_plum==. & amount_pomegranate==. & amount_pear==. & amount_banana==. & amount_raisins==. & amount_fresh_mulberries==. & amount_dried_mulberries==. ///
& amount_mangoes==. & amount_walnuts==. & amount_pistachio==. & amount_almonds==. & amount_white_sugar==. & amount_brown_sugar==. & amount_honey==.


gen fooddrinksoutside=q_12_1
label var fooddrinksoutside "HH expenditure for Food & drinks consumed outside the home, in current prices"


keep hhid hhweight province districtcode district year amount* broccoli HHexpenditure_perweek2007 fooddrinksoutside

rename hhid hhid2007

save "stata\processed\foodconsumption_2007.dta", replace

	
	
*---------------------------------2011 Data	
	
*process price dataset
use "stata\rawdata\NRVA data\NRVA 2011\Price.dta", clear

egen walnuts=rowmean(Q_3_77_Walnuts_with_shells Q_3_78_Walnuts_without_shells)	
egen pistachio=rowmean(Q_3_79_Pistachio_with_shells Q_3_80_Pistachio_without_shells)
egen almonds=rowmean(Q_3_81_Almonds_with_shells Q_3_82_Almonds_without_shells)
egen wheat_flour=rowmean(Q_3_89_Wheat_flour_local Q_3_90_Wheat_flour_imported)
egen rice=rowmean(Q_3_91_Rice_high_quality Q_3_92_Rice_Low_quality)

rename Q_3_56_Vegetable_cotton Q_3_56_Vegetable_oil

drop 	Q_3_77_Walnuts_with_shells Q_3_78_Walnuts_without_shells  Q_3_79_Pistachio_with_shells Q_3_80_Pistachio_without_shells ///
Q_3_81_Almonds_with_shells Q_3_82_Almonds_without_shells Q_3_89_Wheat_flour_local Q_3_90_Wheat_flour_imported Q_3_91_Rice_high_quality Q_3_92_Rice_Low_quality

renpfix Q_3_*_
renpfix _

foreach X of varlist Purchased_nan-Gram_bread  {
	rename `X', lower
}



collapse (mean) purchased_nan-gram_bread walnuts pistachio almonds wheat_flour rice, by(District_Code)

rename District_Code NRVA_District_Code

save "stata\processed\NRVA_Prices_2011.dta", replace

use "stata\processed\NRVAData_2011.dta", clear

merge m:1 NRVA_District_Code using "stata\processed\NRVA_Prices_2011"
drop if _merge==2
drop _merge

renpfix Q_23_6


foreach X of varlist _Wheat_Flour  _Barley _Maize _Beans ///
_Mung _Chick_Peas _Lentils _Vegetable_Oil _Ghee _Milk_Fresh _Dogh _Yogurt _White_Sugar ///
_Brown_Sugar _Honey _Dried_Tomatoes _Dried_Vegetables _Raisins _Fresh_Mulberries _Dried_Mulberries ///
_Walnuts _Pistachio _Almonds _Pasta _Beef _Mutton _Goat _Dried_Meat _Liver _Chicken _Fish ///
_Butter  _Animal_Fat _Krut _Cheese _Potato _Sweet_Potato _Onion _Tomato _Okra _Spinach _Cauliflower ///
_Eggplant _Carrots _Pumpkin _Cucumber _Turnip _Radish _Cabbage _Leek  _Hot_Pepper ///
_Wild_Leafy _Apple _Grapes _Melon _Peach _Fresh_Apricots _Dried_Apricots _Orange ///
_Pomegranate _Plum _Pear _Banana _Purchased_Nan _Eggs _Veal _Milk_Powdered _Curd _Pickled_Vegetables ///
_Green_Beans _Mangoes _Other_Oils {
	rename `X' amount`X'
	rename amount`X', lower
}

egen amount_rice=rowmean(_Rice*)


rename amount_wild_leafy amount_wild_leaves
	
rename egg eggs
rename fresh_pepper hot_pepper 
rename other_oil other_oils
rename milk_powder milk_powdered

sort districtcode
foreach X of varlist wheat_flour rice barley maize beans  ///
mung chick_peas lentils vegetable_oil ghee milk_fresh dogh yogurt white_sugar ///
brown_sugar honey dried_tomatoes dried_vegetables raisins fresh_mulberries dried_mulberries ///
walnuts pistachio almonds pasta beef mutton goat dried_meat liver chicken fish ///
butter  animal_fat krut cheese potato sweet_potato onion tomato okra spinach cauliflower  ///
eggplant carrots pumpkin cucumber turnip radish cabbage leek  hot_pepper ///
wild_leaves apple grapes melon peach fresh_apricots dried_apricots orange  ///
pomegranate plum pear banana purchased_nan eggs veal milk_powdered curd pickled_vegetables ///
green_beans mangoes other_oils {
	bysort districtcode: egen `X'_d2011=median(`X')
}

sort province
foreach X of varlist wheat_flour rice  barley maize beans  ///
mung chick_peas lentils vegetable_oil ghee milk_fresh dogh yogurt white_sugar ///
brown_sugar honey dried_tomatoes dried_vegetables raisins fresh_mulberries dried_mulberries ///
walnuts pistachio almonds pasta beef mutton goat dried_meat liver chicken fish ///
butter  animal_fat krut cheese potato sweet_potato onion tomato okra spinach cauliflower  ///
eggplant carrots pumpkin cucumber turnip radish cabbage leek  hot_pepper ///
wild_leaves apple grapes melon peach fresh_apricots dried_apricots orange  ///
pomegranate plum pear banana purchased_nan eggs veal milk_powdered curd pickled_vegetables ///
green_beans mangoes other_oils {
	bysort province: egen `X'_p2011=median(`X')
}

foreach X of varlist wheat_flour rice barley maize beans  ///
mung chick_peas lentils vegetable_oil ghee milk_fresh dogh yogurt white_sugar ///
brown_sugar honey dried_tomatoes dried_vegetables raisins fresh_mulberries dried_mulberries ///
walnuts pistachio almonds pasta beef mutton goat dried_meat liver chicken fish ///
butter  animal_fat krut cheese potato sweet_potato onion tomato okra spinach cauliflower  ///
eggplant carrots pumpkin cucumber turnip radish cabbage leek  hot_pepper ///
wild_leaves apple grapes melon peach fresh_apricots dried_apricots orange  ///
pomegranate plum pear banana purchased_nan eggs veal milk_powdered curd pickled_vegetables ///
green_beans mangoes other_oils {
	egen `X'_n2011=median(`X')
}

*drop salt black_pepper ginger tomato_sauce mixed_spices coriander mint vegetable_cotton chocolates

foreach X of varlist wheat_flour rice  barley maize beans  ///
mung chick_peas lentils vegetable_oil ghee milk_fresh dogh yogurt white_sugar ///
brown_sugar honey dried_tomatoes dried_vegetables raisins fresh_mulberries dried_mulberries ///
walnuts pistachio almonds pasta beef mutton goat dried_meat liver chicken fish ///
butter  animal_fat krut cheese potato sweet_potato onion tomato okra spinach cauliflower  ///
eggplant carrots pumpkin cucumber turnip radish cabbage leek hot_pepper ///
wild_leaves apple grapes melon peach fresh_apricots dried_apricots orange  ///
pomegranate plum pear banana purchased_nan eggs veal milk_powdered curd pickled_vegetables ///
green_beans mangoes other_oils {
	gen exp`X'=`X'*amount_`X'
	replace exp`X'=`X'_p2011*amount_`X' if exp`X'==.
	replace exp`X'=`X'_n2011*amount_`X' if exp`X'==.
}

egen HHexpenditure_perweek2011=rowtotal(exp*), missing
	
*for 73 items

replace HHexpenditure_perweek=. if amount_wheat_flour==. & amount_rice==. &  amount_barley==. & amount_maize==. & amount_beans==. &  ///
amount_mung==. & amount_chick_peas==. & amount_lentils==. & amount_vegetable_oil==. & amount_ghee==. & amount_milk_fresh==. & amount_dogh==. & amount_yogurt==. & amount_white_sugar==. & ///
amount_brown_sugar==. & amount_honey==. & amount_dried_tomatoes==. & amount_dried_vegetables==. & amount_raisins==. & amount_fresh_mulberries==. & amount_dried_mulberries==. & ///
amount_walnuts==. & amount_pistachio==. & amount_almonds==. & amount_pasta==. & amount_beef==. & amount_mutton==. & amount_goat==. & amount_dried_meat==. & amount_liver==. & amount_chicken==. & amount_fish==. & ///
amount_butter ==. & amount_animal_fat==. & amount_krut==. & amount_cheese==. & amount_potato==. & amount_sweet_potato==. & amount_onion==. & amount_tomato==. & amount_okra==. & amount_spinach==. & amount_cauliflower ==. & ///
amount_eggplant==. & amount_carrots==. & amount_pumpkin==. & amount_cucumber==. & amount_turnip==. & amount_radish==. & amount_cabbage==. & amount_leek==. & amount_hot_pepper==. & ///
amount_wild_leaves==. & amount_apple==. & amount_grapes==. & amount_melon==. & amount_peach==. & amount_fresh_apricots==. & amount_dried_apricots==. & amount_orange==. &  ///
amount_pomegranate==. & amount_plum==. & amount_pear==. & amount_banana==. & amount_purchased_nan==. & amount_eggs==. & amount_veal==. & amount_milk_powdered==. & amount_curd==. & amount_pickled_vegetables==. & ///
amount_green_beans==. & amount_mangoes==. & amount_other_oils==.

drop exp*

gen fooddrinksoutside=Q_10_2
label var fooddrinksoutside "HH expenditure for Food & drinks consumed outside the home, in current prices"


keep hhid hhweight province districtcode district year amount* wheat_flour_d2011- HHexpenditure_perweek2011
rename hhid hhid2011

save "stata\processed\foodconsumption_2011.dta", replace

		

*---------------------------------Combine all	

use "stata\processed\foodconsumption_2005.dta", clear

append using "stata\processed\foodconsumption_2007.dta"

append using "stata\processed\foodconsumption_2011.dta"

sort year districtcode

gen panelhhid=_n /*hh are not tracked, so unique observations*/

sort districtcode
foreach X of varlist wheat_flour_d2005-brown_sugar_d2005 wheat_flour_d2011-other_oils_d2011 {
bysort districtcode: egen M_`X'=mean(`X')
drop `X'
}

sort province
foreach X of varlist wheat_flour_p2005-brown_sugar_p2005 wheat_flour_p2011- other_oils_p2011 {
bysort province: egen M_`X'=mean(`X')
drop `X'
}

sort districtcode
foreach X of varlist wheat_flour_n2005-brown_sugar_n2005 wheat_flour_n2011- other_oils_n2011 {
egen M_`X'=mean(`X')
drop `X'
}

renpfix M_


/*No overlap for
amount_milk_powdered
fresh_apricots
amount_other_oil 
amount_curd 
amount_pickled_vegetables 
amount_green_beans
amount_veal
amount_mangoes
amount_broccoli
*/

drop amount_other_oil amount_other_oils amount_curd amount_pickled_vegetables amount_green_beans amount_veal amount_mangoes amount_milk_powdered amount_broccoli


********************Calculate expenditures using 2005 prices****************

	
rename milk_d2005 milk_fresh_d2005 
rename milk_p2005 milk_fresh_p2005
rename milk_n2005 milk_fresh_n2005	
rename sugar_d2005 white_sugar_d2005 
rename sugar_p2005 white_sugar_p2005 
rename sugar_n2005 white_sugar_n2005 
rename dried_tomato_d2005 dried_tomatoes_d2005	
rename dried_tomato_p2005 dried_tomatoes_p2005	
rename dried_tomato_n2005 dried_tomatoes_n2005	
rename dried_vegetable_d2005 dried_vegetables_d2005
rename dried_vegetable_p2005 dried_vegetables_p2005
rename dried_vegetable_n2005 dried_vegetables_n2005
rename lamb_d2005 mutton_d2005
rename lamb_p2005 mutton_p2005
rename lamb_n2005 mutton_n2005
rename melon_water_melon_d2005 melon_d2005
rename melon_water_melon_p2005 melon_p2005
rename melon_water_melon_n2005 melon_n2005
rename dried_apricot_d2005 dried_apricots_d2005	
rename dried_apricot_p2005 dried_apricots_p2005	
rename dried_apricot_n2005 dried_apricots_n2005	

*no prices in 2005 for pasta pumpkin pear banana purchased_nan

cap drop exp*
foreach X in wheat_flour rice barley maize beans  ///
mung chick_peas lentils vegetable_oil ghee milk_fresh dogh yogurt white_sugar ///
brown_sugar honey dried_tomatoes dried_vegetables raisins fresh_mulberries dried_mulberries ///
walnuts pistachio almonds beef mutton goat dried_meat liver chicken fish ///
butter  animal_fat krut cheese potato sweet_potato onion tomato okra spinach cauliflower  ///
eggplant carrots cucumber turnip radish cabbage leek hot_pepper ///
wild_leaves apple grapes melon peach dried_apricots orange  ///
pomegranate plum  eggs {
	gen exp`X'=`X'_d2005*amount_`X'
	replace exp`X'=`X'_p2005*amount_`X' if exp`X'==.
	replace exp`X'=`X'_n2005*amount_`X' if exp`X'==.
}	

foreach X in pasta pumpkin pear banana purchased_nan {
	gen exp`X'=`X'_d2011*amount_`X'
	replace exp`X'=`X'_p2011*amount_`X' if exp`X'==.
	replace exp`X'=`X'_n2011*amount_`X' if exp`X'==.
}	

drop HHexpenditure_perweek2005
egen hhexp_2005=rowtotal(exp*), missing

drop exp*



********************Calculate expenditures using 2011 prices****************

foreach X in wheat_flour rice barley maize beans  ///
mung chick_peas lentils vegetable_oil ghee milk_fresh dogh yogurt white_sugar ///
brown_sugar honey dried_tomatoes dried_vegetables raisins fresh_mulberries dried_mulberries ///
walnuts pistachio almonds pasta beef mutton goat dried_meat liver chicken fish ///
butter  animal_fat krut cheese potato sweet_potato onion tomato okra spinach cauliflower  ///
eggplant carrots pumpkin cucumber turnip radish cabbage leek hot_pepper ///
wild_leaves apple grapes melon peach dried_apricots orange  ///
pomegranate plum pear banana purchased_nan eggs {
	gen expamount`X'=`X'_d2011*amount_`X'
	replace expamount`X'=`X'_p2011*amount_`X' if expamount`X'==.
	replace expamount`X'=`X'_n2011*amount_`X' if expamount`X'==.
}		

drop HHexpenditure_perweek2011
egen hhexp_2011=rowtotal(exp*), missing



****USE PRICE DATA OF 2005 or 2011 TO GET EXPENDITURES IN CONSTANT VALUES
**************************************************************	
******Create Paasche price index for spatial differences******

* Laspeyres: P_i=(sum over j Q_kj*P_ij)/(sum over j Q_kj*P_kj)
* Paasche: P_i=(sum over j Q_ij*P_ij)/(sum over j Q_ij*P_kj)
* where k  is the base region,  i  indexes every other region, j  indexes each item in the consumption basket, and Q  and  P  are quantities and prices


foreach X of varlist amount_wheat_flour amount_rice amount_barley amount_maize amount_beans ///
amount_mung amount_chick_peas amount_lentils amount_vegetable_oil amount_ghee amount_milk_fresh amount_dogh amount_yogurt amount_white_sugar ///
amount_brown_sugar amount_honey amount_dried_tomatoes amount_dried_vegetables amount_raisins amount_fresh_mulberries amount_dried_mulberries ///
amount_walnuts amount_pistachio amount_almonds amount_pasta amount_beef amount_mutton amount_goat amount_dried_meat amount_liver amount_chicken amount_fish ///
amount_butter  amount_animal_fat amount_krut amount_cheese amount_potato amount_sweet_potato amount_onion amount_tomato amount_okra amount_spinach amount_cauliflower ///
amount_eggplant amount_carrots amount_pumpkin amount_cucumber amount_turnip amount_radish amount_cabbage amount_leek amount_hot_pepper ///
amount_wild_leaves amount_apple amount_grapes amount_melon amount_peach amount_dried_apricots amount_orange ///
amount_pomegranate amount_plum amount_pear amount_banana amount_purchased_nan amount_eggs  {
	sort districtcode
	bysort districtcode: egen d`X'=median(`X')
	egen n`X'=median(`X')
}



****Paasche and Laspeyres for 2011
foreach X in wheat_flour rice barley maize beans  ///
mung chick_peas lentils vegetable_oil ghee milk_fresh dogh yogurt white_sugar ///
brown_sugar honey dried_tomatoes dried_vegetables raisins fresh_mulberries dried_mulberries ///
walnuts pistachio almonds pasta beef mutton goat dried_meat liver chicken fish ///
butter  animal_fat krut cheese potato sweet_potato onion tomato okra spinach cauliflower  ///
eggplant carrots pumpkin cucumber turnip radish cabbage leek hot_pepper ///
wild_leaves apple grapes melon peach dried_apricots orange  ///
pomegranate plum pear banana purchased_nan eggs {
	*for Paasche
	sort districtcode
	bysort districtcode: gen pdisexp`X'=`X'_d2011*damount_`X'
	bysort districtcode: replace pdisexp`X'=`X'_p2011*damount_`X' if pdisexp`X'==.
	bysort districtcode: gen pnatexp`X'=`X'_n2011*damount_`X'
	*for Lasypeyres
	bysort districtcode: gen ldisexp`X'=`X'_d2011*namount_`X'
	bysort districtcode: replace ldisexp`X'=`X'_p2011*amount_`X' if ldisexp`X'==.
	bysort districtcode: gen lnatexp`X'=`X'_n2011*namount_`X'
}

egen pnumerator=rowtotal(pdisexp*), missing
egen pdenominator=rowtotal(pnatexp*), missing
egen lnumerator=rowtotal(ldisexp*), missing
egen ldenominator=rowtotal(lnatexp*), missing
bysort districtcode: gen paasche_2011=pnumerator/pdenominator
bysort districtcode: gen laypeyres_2011=lnumerator/ldenominator


drop pnumerator  pdenominator lnumerator ldenominator pdisexp* pnatexp* ldisexp* lnatexp*
	
****Paasche and Laspeyres for 2005 (prices that are missing for pasta pumpkin pear banana purchased_nan are not imputed by 2011 prices
foreach X in wheat_flour rice barley maize beans  ///
mung chick_peas lentils vegetable_oil ghee milk_fresh dogh yogurt white_sugar ///
brown_sugar honey dried_tomatoes dried_vegetables raisins fresh_mulberries dried_mulberries ///
walnuts pistachio almonds beef mutton goat dried_meat liver chicken fish ///
butter  animal_fat krut cheese potato sweet_potato onion tomato okra spinach cauliflower  ///
eggplant carrots  cucumber turnip radish cabbage leek hot_pepper ///
wild_leaves apple grapes melon peach dried_apricots orange  ///
pomegranate plum  eggs {
	*for Paasche
	sort districtcode
	bysort districtcode: gen pdisexp`X'=`X'_d2005*damount_`X'
	bysort districtcode: replace pdisexp`X'=`X'_p2005*damount_`X' if pdisexp`X'==.
	bysort districtcode: gen pnatexp`X'=`X'_n2005*damount_`X'
	*for Lasypeyres
	bysort districtcode: gen ldisexp`X'=`X'_d2005*namount_`X'
	bysort districtcode: replace ldisexp`X'=`X'_p2005*amount_`X' if ldisexp`X'==.
	bysort districtcode: gen lnatexp`X'=`X'_n2005*namount_`X'
}

egen pnumerator=rowtotal(pdisexp*), missing
egen pdenominator=rowtotal(pnatexp*), missing
egen lnumerator=rowtotal(ldisexp*), missing
egen ldenominator=rowtotal(lnatexp*), missing
bysort districtcode: gen paasche_2005=pnumerator/pdenominator
bysort districtcode: gen laypeyres_2005=lnumerator/ldenominator


drop pnumerator  pdenominator lnumerator ldenominator pdisexp* pnatexp* ldisexp* lnatexp*	


gen hhexp_paasche2011=hhexp_2011/paasche_2011
gen hhexp_lasp2011=hhexp_2011/laypeyres_2011

gen hhexp_paasche2005=hhexp_2005/paasche_2005
gen hhexp_lasp2005=hhexp_2005/laypeyres_2005


label var hhexp_2011 "Household food expenditure per week, using constant 2011 prices"
label var hhexp_paasche2011 "Household food expenditure per week, using constant 2011 prices, deflated by district-level Paasch price index"
label var hhexp_lasp2011 "Household food expenditure per week, using constant 2011 prices, deflated by district-level Laspeyre's price index"
label var hhexp_2005 "Household food expenditure per week, using constant 2005 prices (imputed by 2011 prices if missing: pasta pumpkin pear banana purchased_nan)"
label var hhexp_paasche2005 "Household food expenditure per week, using constant 2005prices, deflated by district-level Paasch price index (imputed by 2011 prices if missing: pasta pumpkin pear banana purchased_nan)"
label var hhexp_lasp2005 "Household food expenditure per week, using constant 2005 prices, deflated by district-level Laspeyre's price index (imputed by 2011 prices if missing: pasta pumpkin pear banana purchased_nan)"
label var amount_wheat_flour "Amount if wheat flour consumed by a HH per week in kg"
label var wheat_flour_d2005 "District-level median price of wheat flower per kg using 2005 prices"
label var wheat_flour_p2005 "Province-level median price of wheat flower per kg using 2005 prices"
label var wheat_flour_n2005 "Nationak median price of wheat flower per kg using 2005 prices"
label var wheat_flour_d2011 "District-level median price of wheat flower per kg using 2011 prices"
label var wheat_flour_p2011 "Province-level median price of wheat flower per kg using 2011 prices"
label var wheat_flour_n2011 "Nationak median price of wheat flower per kg using 2011 prices"
/*only using overlap of food items asked in all 3 waves*/
/*needs to be corrected for HHmember to get per capita expenditures or by weights following deaton & Zaidi and Ciarli et al 2010
house-hold equivalence (HHEquiv) with weights being (0.67 for head of household, 0.33 for adults/youth older than 13 and 0.2 for children ≤13)*/

gen fooddrinksoutside2005=fooddrinksoutside if year==2005
replace fooddrinksoutside2005=fooddrinksoutside*(71.1409742918197/82.7748069188245) if year==2007
replace fooddrinksoutside2005=fooddrinksoutside*(71.1409742918197/108.066600010077) if year==2008
replace fooddrinksoutside2005=fooddrinksoutside*(71.1409742918197/110.201660141582) if year==2011
replace fooddrinksoutside2005=fooddrinksoutside*(71.1409742918197/118.156300027035) if year==2012
replace fooddrinksoutside2005=fooddrinksoutside2005/30*7

label var fooddrinksoutside2005 "Expenditures in 2005 prices for Food and Drinks consumed outside home per week"


gen fooddrinksoutside2011=fooddrinksoutside if year==2011
replace fooddrinksoutside2011=fooddrinksoutside*(110.201660141582/71.1409742918197) if year==2005
replace fooddrinksoutside2011=fooddrinksoutside*(110.201660141582/82.7748069188245) if year==2007
replace fooddrinksoutside2011=fooddrinksoutside*(110.201660141582/108.066600010077) if year==2008
replace fooddrinksoutside2011=fooddrinksoutside*(110.201660141582/118.156300027035) if year==2012
replace fooddrinksoutside2011=fooddrinksoutside2011/30*7

label var fooddrinksoutside2011 "Expenditures in 2011 prices for Food and Drinks consumed outside home per week"


*Need to deflate by regional Paasche as well
gen fooddrinksoutside_paasche2011=fooddrinksoutside2011/paasche_2011
gen fooddrinksoutside_lasp2011=fooddrinksoutside2011/laypeyres_2011

gen fooddrinksoutside_paasche2005=fooddrinksoutside2005/paasche_2005
gen fooddrinksoutside_lasp2005=fooddrinksoutside2005/laypeyres_2005

label var fooddrinksoutside_paasche2005 "Expenditures in 2005 prices for Food and Drinks consumed outside home per week, deflated by Paasche price index"
label var fooddrinksoutside_lasp2005 "Expenditures in 2005 prices for Food and Drinks consumed outside home per week, deflated by Laspeyre's price index"

label var fooddrinksoutside_paasche2011 "Expenditures in 2011 prices for Food and Drinks consumed outside home per week, deflated by Paasche price index"
label var fooddrinksoutside_lasp2011 "Expenditures in 2011 prices for Food and Drinks consumed outside home per week, deflated by Laspeyre's price index"

****Take the sum of both
egen hhexp_total_2011=rowtotal(fooddrinksoutside2011 hhexp_2011), missing
egen hhexp_total_paasche2011=rowtotal(fooddrinksoutside_paasche2011 hhexp_paasche2011), missing
egen hhexp_total_lasp2011=rowtotal(fooddrinksoutside_lasp2011 hhexp_lasp2011), missing

egen hhexp_total_2005=rowtotal(fooddrinksoutside2005 hhexp_2005), missing
egen hhexp_total_paasche2005=rowtotal(fooddrinksoutside_paasche2005 hhexp_paasche2005), missing
egen hhexp_total_lasp2005=rowtotal(fooddrinksoutside_lasp2005 hhexp_lasp2005), missing




label var hhexp_total_2011 "Household food expenditure per week (including food&drinks outside), using constant 2011 prices"
label var hhexp_total_paasche2011 "Household food expenditure per week(including food&drinks outside), using constant 2011 prices, deflated by district-level Paasch price index"
label var hhexp_total_lasp2011 "Household food expenditure per week(including food&drinks outside), using constant 2011 prices, deflated by district-level Laspeyre's price index"
label var hhexp_total_2005 "Household food expenditure per week(including food&drinks outside), using constant 2005 prices (imputed by 2011 prices if missing: pasta pumpkin pear banana purchased_nan)"
label var hhexp_total_paasche2005 "Household food expenditure per week(including food&drinks outside), using constant 2005 prices, deflated by district-level Paasch price index (imputed by 2011 prices if missing: pasta pumpkin pear banana purchased_nan)"
label var hhexp_total_lasp2005 "Household food expenditure per week(including food&drinks outside), using constant 2005 prices, deflated by district-level Laspeyre's price index (imputed by 2011 prices if missing: pasta pumpkin pear banana purchased_nan)"

	
keep hhweight* hhid2005 hhid2007 hhid2011 province districtcode district year amount_wheat_flour fooddrinksoutside* wheat_flour* hhexp* 

drop fooddrinksoutside

gen hhid=hhid2005 if year==2005
replace hhid=hhid2007 if year==2007 | year==2008
replace hhid=hhid2011 if year==2011 | year==2012

unique districtcode

save "stata\processed\hhfoodexpenditures.dta", replace



/*******************************************************************************
*                               Food consumption                               *
*******************************************************************************/


*****************************************************************************************
***************construct Food diversity index (8 food categories)***************
*"Dietary diversity is defined as the number of different foods or food groups eaten over a reference time period, not regarding the frequency of consumption."

*-----------------------------------2005 Data 

use "stata\processed\NRVAData_2005.dta", clear
	
egen n_mainstaple=rowtotal(Wheat_Flour_15_4 Rice_15_4 Barley_15_4 Maize_15_4 Pasta_Macaroni_15_4 Purchased_Nan_15_4), missing

egen n_pulses=rowtotal(Beans_15_4 Mung_15_4 Chick_Peas_15_4 Lentils_15_4), missing

egen n_vegetables=rowtotal(Dried_Tomato_15_4 Dried_Vegetable_15_4 Potato_15_4 Sweet_Potato_15_4 Onion_15_4 Tomato_15_4 Okra_15_4 ///
Spinach_15_4 Cauliflower_15_4 Eggplant_15_4 Carrots_15_4 Pumpkin_15_4 Cucumber_15_4 Turnip_15_4 Radish_15_4 Cabbage_15_4 Leek_15_4 ///
Broccoli_15_4 Hot_Pepper_15_4 Wild_Leaves_15_4), missing

egen n_fruit=rowtotal(Raisins_15_4 Fresh_Mulberries_15_4 Dried_Mulberries_15_4 Apple_15_4 Grapes_15_4 Melon_Water_Melon_15_4 Peach_15_4  ///
Fresh_Apricot_15_4 Dried_Appricot_15_4 Orange_15_4 Pomegranate_15_4 Plum_15_4 Pear_15_4 Banana_15_4), missing

egen n_meatfish=rowtotal(Beef_15_4 Lamb_15_4 Goat_15_4 Dried_Meat_15_4 Liver_15_4 Chicken_15_4 Fish_15_4 Egg_No_15_4), missing

egen n_milk=rowtotal(Milk_15_4 Yogurt_15_4 Cheese_15_4 Dogh_15_4 Krut_15_4), missing

egen n_sugar=rowtotal(Sugar_15_4 Brown_Sugar_15_4 Honey_15_4), missing

egen n_oil=rowtotal(Vegetable_Oil_15_4 Ghee_15_4 Butter_15_4  Animal_Fat_15_4), missing

egen n_nuts=rowtotal(Walnuts_15_4 Pistachio_15_4 Almonds_15_4), missing

*spices: Coriander_15_4 Mint_15_4

foreach var in mainstaple pulses vegetables fruit meatfish milk sugar oil {
	gen `var'=.
	replace `var'=1 if n_`var'>=1 & n_`var'!=.
	replace `var'=0 if n_`var'==0 & n_`var'!=.
}

egen dietarydiversity=rowtotal(mainstaple pulses vegetables fruit meatfish milk sugar oil), missing


*****************************************************************************************
************construct Food Consumption Score (8 food categories, weighted)***************

foreach var in mainstaple pulses vegetables fruit meatfish milk sugar oil {
	gen n_`var'_nomiss=n_`var'
	replace n_`var'_nomiss=0 if n_`var'_nomiss==.
}
		
*multiply Food frequency of each category with weights and take the sum
*Food frequency, in this context, is defined as the frequency (in terms of days of consumption over a reference period) that a specific food item or food group is eaten at the household level.
gen fcs=.
replace fcs=2*n_mainstaple_nomiss+3*n_pulses_nomiss+n_vegetables_nomiss+n_fruit_nomiss+4*n_meatfish_nomiss+4*n_milk_nomiss+0.5*n_sugar_nomiss+0.5*n_oil_nomiss
replace fcs=. if dietarydiversity==. /*the same should be missing for FCS as well*/
*http://documents.wfp.org/stellent/groups/public/documents/manual_guide_proced/wfp197216.pdf

label var dietarydiversity "Dietary diversity: the number of different food groups eaten over the last 7 days, not regarding the frequency of consumption."
label var fcs "Food Consumption Score: weighted sum of the frequencies with which households consume foods within eight food groups over the previous week"

/*I follow D'Souza & D. Jolliffe 2012 "Rising Food Prices and Coping Strategies: Household-level Evidence from Afghanistan" and 
Wiesmann et al 2009 "Validation of the World Food Programme's Food Consumption Score and Alternative Indicators of Household Food Security"*/


*****************************************************************************************
************Calorie intake***************	
*FOOD COMPOSITION TABLE FOR USE IN THE NEAR EAST: http://www.fao.org/docrep/003/x6879e/X6879E03.htm#ch3.I.1
*http://cso.gov.af/Content/Media/Documents/CSO-WB_Tech-Report-Pov_v4(2)1162011121045651553325325.pdf
*for eggs (number) I took 70, for nan pieces I took 284 and for maize (corn) I took 726 from this source: 
*http://siteresources.worldbank.org/AFGHANISTANEXTN/Resources/305984-1326909014678/8376871-1334700522455/NRVA0708-Quality.pdf (download 28.12.2017)

egen allfoodmissing=rowtotal(Wheat_Flour_15_6 Rice_15_6 Barley_15_6 Maize_15_6 Beans_15_6 Mung_15_6 Chick_Peas_15_6 Lentils_15_6 Vegetable_Oil_15_6 ///
Ghee_15_6 Milk_15_6 Dogh_15_6 Yogurt_15_6 Sugar_15_6 Brown_Sugar_15_6 Honey_15_6 Dried_Tomato_15_6 Dried_Vegetable_15_6 Raisins_15_6 ///
Fresh_Mulberries_15_6 Dried_Mulberries_15_6 Walnuts_15_6 Pistachio_15_6 Almonds_15_6 Pasta_Macaroni_15_6 Beef_15_6 Lamb_15_6 Goat_15_6 ///
Dried_Meat_15_6 Liver_15_6 Chicken_15_6 Fish_15_6 Butter_15_6 Animal_Fat_15_6 Krut_15_6 Cheese_15_6 Potato_15_6 Sweet_Potato_15_6 Onion_15_6 ///
Tomato_15_6 Okra_15_6 Spinach_15_6 Cauliflower_15_6 Eggplant_15_6 Carrots_15_6 Pumpkin_15_6 Cucumber_15_6 Turnip_15_6 Radish_15_6 Cabbage_15_6 ///
Leek_15_6 Broccoli_15_6 Hot_Pepper_15_6 Wild_Leaves_15_6 Apple_15_6 Grapes_15_6 Melon_Water_Melon_15_6 Peach_15_6 Fresh_Apricot_15_6 ///
Dried_Appricot_15_6 Orange_15_6 Pomegranate_15_6 Plum_15_6 Pear_15_6 Banana_15_6 Purchased_Nan_15_6 Egg_No_15_6), missing


foreach X of varlist Wheat_Flour_15_6 Rice_15_6 Barley_15_6 Maize_15_6 Beans_15_6 Mung_15_6 Chick_Peas_15_6 Lentils_15_6 Vegetable_Oil_15_6 ///
Ghee_15_6 Milk_15_6 Dogh_15_6 Yogurt_15_6 Sugar_15_6 Brown_Sugar_15_6 Honey_15_6 Dried_Tomato_15_6 Dried_Vegetable_15_6 Raisins_15_6 ///
Fresh_Mulberries_15_6 Dried_Mulberries_15_6 Walnuts_15_6 Pistachio_15_6 Almonds_15_6 Pasta_Macaroni_15_6 Beef_15_6 Lamb_15_6 Goat_15_6 ///
Dried_Meat_15_6 Liver_15_6 Chicken_15_6 Fish_15_6 Butter_15_6 Animal_Fat_15_6 Krut_15_6 Cheese_15_6 Potato_15_6 Sweet_Potato_15_6 Onion_15_6 ///
Tomato_15_6 Okra_15_6 Spinach_15_6 Cauliflower_15_6 Eggplant_15_6 Carrots_15_6 Pumpkin_15_6 Cucumber_15_6 Turnip_15_6 Radish_15_6 Cabbage_15_6 ///
Leek_15_6 Broccoli_15_6 Hot_Pepper_15_6 Wild_Leaves_15_6 Apple_15_6 Grapes_15_6 Melon_Water_Melon_15_6 Peach_15_6 Fresh_Apricot_15_6 ///
Dried_Appricot_15_6 Orange_15_6 Pomegranate_15_6 Plum_15_6 Pear_15_6 Banana_15_6 Purchased_Nan_15_6 Egg_No_15_6 {
	replace `X'=0 if `X'==.
}
	
gen caloriesperweek=3600*Wheat_Flour_15_6 + 3630*Rice_15_6 + 3270*Barley_15_6 + 726*Maize_15_6 + 3470* Beans_15_6 ///
+ 3610* Mung_15_6 + 3570*Chick_Peas_15_6 + 3540* Lentils_15_6 + 8840* Vegetable_Oil_15_6  + 8730* Ghee_15_6 ///
+ 780*Milk_15_6 + 140* Dogh_15_6 + 590* Yogurt_15_6 + 3860* Sugar_15_6 + 3800* Brown_Sugar_15_6 + 3120* Honey_15_6 ///
+ 2590* Dried_Tomato_15_6 + 2860* Dried_Vegetable_15_6 + 2930* Raisins_15_6 + 690*Fresh_Mulberries_15_6 + 3330*Dried_Mulberries_15_6 ///
+ 2770* Walnuts_15_6 + 3330* Pistachio_15_6 + 2470* Almonds_15_6 + 3790* Pasta_Macaroni_15_6 ///
+ 1240* Beef_15_6 + 2330* Lamb_15_6 + 1570* Goat_15_6 + 5020* Dried_Meat_15_6 + 1340* Liver_15_6 + 1270* Chicken_15_6 + 430* Fish_15_6 ///
+ 6930* Butter_15_6 + 9020* Animal_Fat_15_6 + 2965* Krut_15_6 + 3100* Cheese_15_6 + 750* Potato_15_6 + 920* Sweet_Potato_15_6 + 340* Onion_15_6 + 180*Tomato_15_6 ///
+ 390* Okra_15_6 + 250* Spinach_15_6 + 150* Cauliflower_15_6 +330* Eggplant_15_6 + 370* Carrots_15_6 + 270* Pumpkin_15_6 + 170* Cucumber_15_6 +230* Turnip_15_6 ///
+ 280* Radish_15_6 + 160* Cabbage_15_6 + 440* Leek_15_6 + 200*Broccoli_15_6 + 2870* Hot_Pepper_15_6 + 190* Wild_Leaves_15_6 ///
+ 490* Apple_15_6 + 670* Grapes_15_6 + 270* Melon_Water_Melon_15_6 + 460* Peach_15_6 + 520* Fresh_Apricot_15_6 + 2960* Dried_Appricot_15_6 + 330* Orange_15_6 ///
+ 430* Pomegranate_15_6 + 460* Plum_15_6 + 560* Pear_15_6 + 580* Banana_15_6 + 284* Purchased_Nan_15_6 + 70* Egg_No_15_6

replace caloriesperweek=. if allfoodmissing==.
replace caloriesperweek=. if caloriesperweek==0 /*they could have eaten outside and they did accoring to the question but there is no way to measure calories*/

gen caloriesperday=caloriesperweek/7

gen calories_pm=caloriesperday/HH_Mem_Resident_ate_15_1 /*somehow all kcal are given *10 */
replace How_Many_Meals_15_2=0 if How_Many_Meals_15_2==.
replace How_Many_Times_15_3=0 if How_Many_Times_15_3==.
gen adj_hhmember=HH_Mem_Resident_ate_15_1+(How_Many_Meals_15_2/(3*7))-(How_Many_Times_15_3/(3*7)) /*assuming 3 meals per day*/
replace adj_hhmember=0 if adj_hhmember<0
gen calories_pmg=caloriesperday/adj_hhmember /*somehow all kcal are given *10 */
gen foodinsecure=0 if calories_pm!=.
replace foodinsecure=1 if calories_pmg<2100
replace foodinsecure=1 if calories_pmg==. & calories_pm<2100

label var calories_pm "Daily calorie intake per household member (regular eaters within last 7 days)"
label var calories_pmg "Daily calorie intake per household member (regular eaters within last 7 days) adjusted by person-meals from guests and person-meals eaten outside"
label var foodinsecure "If daily per capita calorie intake is smaller 2100kcal (average in HH)"

rename HH_Mem_Resident_ate_15_1 hhmember_ate
label var hhmember_ate "How many household members were resident and ate at least dinner regularly in the household during the last 7 days "
label var adj_hhmember "hhmember_ate adjusted by guest meals and meals eaten outside"

*for getting adult-equivalent fraction: http://www.scielo.br/scielo.php?script=sci_arttext&pid=S0102-311X2010001100020


************Consumption Expenditures***************
*Durables, Housing conditions, bank account, child health

*following Gollin, Kirchberger, Lagakos (2015) "Measuring Living Standards Across Space in the DevelopingWorld"
*Durables: television, car, mobile or landline 


keep hhid province districtcode district year  ///
dietarydiversity fcs calories_pmg calories_pm foodinsecure adj_hhmember hhmember_ate

rename hhid hhid2005

unique districtcode

save "stata\processed\livingstandards_2005.dta", replace



*---------------------------2007 Data 

use "stata\processed\NRVAData_2007.dta", clear


*********Food consumption
***************construct Food diversity index (8 food categories)
*"Dietary diversity is defined as the number of different foods or food groups eaten over a reference time period, not regarding the frequency of consumption."

egen n_mainstaple=rowtotal(q_15_4_2 q_15_4_3 q_15_4_4 q_15_4_5 q_15_4_10 q_15_4_11), missing

egen n_pulses=rowtotal(q_15_4_6 q_15_4_7 q_15_4_8 q_15_4_9), missing

egen n_vegetables=rowtotal(q_15_4_35 q_15_4_36 q_15_4_37 q_15_4_38 q_15_4_39 q_15_4_40 q_15_4_41 ///
q_15_4_42 q_15_4_43 q_15_4_44 q_15_4_45 q_15_4_46 q_15_4_47 q_15_4_48 q_15_4_49 q_15_4_50 q_15_4_51 ///
q_15_4_52 q_15_4_55 q_15_4_56 q_15_4_57 q_15_4_58 q_15_4_59 q_15_4_89), missing /*including tomato sauce*/

egen n_fruit=rowtotal(q_15_4_60 q_15_4_61 q_15_4_62 q_15_4_63 q_15_4_64 q_15_4_65 q_15_4_66 q_15_4_67 ///
q_15_4_68 q_15_4_69 q_15_4_70 q_15_4_71 q_15_4_72 q_15_4_73 q_15_4_77), missing

egen n_meatfish=rowtotal(q_15_4_12 q_15_4_13 q_15_4_14 q_15_4_15 q_15_4_16 q_15_4_17 q_15_4_18 q_15_4_19 q_15_4_20 q_15_4_30), missing

egen n_milk=rowtotal(q_15_4_21 q_15_4_22 q_15_4_23 q_15_4_24 q_15_4_29 q_15_4_31 q_15_4_25 q_15_4_26), missing

egen n_sugar=rowtotal(q_15_4_78 q_15_4_79 q_15_4_80), missing

egen n_oil=rowtotal(q_15_4_27 q_15_4_28 q_15_4_32 q_15_4_33 q_15_4_34), missing

egen n_nuts=rowtotal(q_15_4_74 q_15_4_75 q_15_4_76), missing

*spices: Coriander_15_4 Mint_15_4

foreach var in mainstaple pulses vegetables fruit meatfish milk sugar oil {
	gen `var'=.
	replace `var'=1 if n_`var'>=1 & n_`var'!=.
	replace `var'=0 if n_`var'==0 & n_`var'!=.
}

egen dietarydiversity=rowtotal(mainstaple pulses vegetables fruit meatfish milk sugar oil), missing
		

************construct Food Consumption Score (8 food categories, weighted)

foreach var in mainstaple pulses vegetables fruit meatfish milk sugar oil {
	gen n_`var'_nomiss=n_`var'
	replace n_`var'_nomiss=0 if n_`var'_nomiss==.
}

*multiply Food frequency of each category with weights and take the sum
*Food frequency, in this context, is defined as the frequency (in terms of days of consumption over a reference period) that a specific food item or food group is eaten at the household level.
gen fcs=.
replace fcs=2*n_mainstaple_nomiss+3*n_pulses_nomiss+n_vegetables_nomiss+n_fruit_nomiss+4*n_meatfish_nomiss+4*n_milk_nomiss+0.5*n_sugar_nomiss+0.5*n_oil_nomiss
*the weights are from Wiesmann et al 2009 who follows World Food Programme (2007, 17ff).
replace fcs=. if dietarydiversity==. /*the same should be missing for FCS as well*/

label var dietarydiversity "Dietary diversity: the number of different food groups eaten over the last 7 days, not regarding the frequency of consumption."
label var fcs "Food Consumption Score: weighted sum of the frequencies with which households consume foods within eight food groups over the previous week"

/*I follow D'Souza & D. Jolliffe 2012 "Rising Food Prices and Coping Strategies: Household-level Evidence from Afghanistan" and 
Wiesmann et al 2009 "Validation of the World Food Programme's Food Consumption Score and Alternative Indicators of Household Food Security"*/



************Calorie intake***************	
*FOOD COMPOSITION TABLE FOR USE IN THE NEAR EAST: http://www.fao.org/docrep/003/x6879e/X6879E03.htm#ch3.I.1
*http://cso.gov.af/Content/Media/Documents/CSO-WB_Tech-Report-Pov_v4(2)1162011121045651553325325.pdf
*for eggs (number) I took 70, for nan pieces I took 284 and for maize (corn) I took 726 from this source: 
*http://siteresources.worldbank.org/AFGHANISTANEXTN/Resources/305984-1326909014678/8376871-1334700522455/NRVA0708-Quality.pdf (download 28.12.2017)

egen allfoodmissing=rowtotal(q_15_6_2 q_15_6_1 q_15_6_4 q_15_6_5 q_15_6_6 q_15_6_7 q_15_6_8 q_15_6_9 q_15_6_32 q_15_6_27 ///
q_15_6_21 q_15_6_26 q_15_6_23 q_15_6_78 q_15_6_79 q_15_6_80 q_15_6_55 q_15_6_56 q_15_6_70 q_15_6_71 q_15_6_72 ///
q_15_6_74 q_15_6_75 q_15_6_76 q_15_6_10 q_15_6_12 q_15_6_14 q_15_6_15 q_15_6_18 q_15_6_17 q_15_6_16 q_15_6_19 ///
q_15_6_28 q_15_6_33 q_15_6_25 q_15_6_29 q_15_6_35 q_15_6_36 q_15_6_37 q_15_6_38 q_15_6_39 q_15_6_40 q_15_6_41 q_15_6_42 ///
q_15_6_43 q_15_6_44 q_15_6_45 q_15_6_47 q_15_6_46 q_15_6_48 q_15_6_49 q_15_6_50 q_15_6_51 q_15_6_52 ///
q_15_6_60 q_15_6_61 q_15_6_62 q_15_6_63 q_15_6_64 q_15_6_65 q_15_6_67 q_15_6_66 q_15_6_68 q_15_6_69 q_15_6_3 q_15_6_30 ///
q_15_6_13 q_15_6_22 q_15_6_24 q_15_6_57 q_15_6_58 q_15_6_73), missing

foreach X of varlist q_15_6_2 q_15_6_1 q_15_6_4 q_15_6_5 q_15_6_6 q_15_6_7 q_15_6_8 q_15_6_9 q_15_6_32 q_15_6_27 ///
q_15_6_21 q_15_6_26 q_15_6_23 q_15_6_78 q_15_6_79 q_15_6_80 q_15_6_55 q_15_6_56 q_15_6_70 q_15_6_71 q_15_6_72 ///
q_15_6_74 q_15_6_75 q_15_6_76 q_15_6_10 q_15_6_12 q_15_6_14 q_15_6_15 q_15_6_18 q_15_6_17 q_15_6_16 q_15_6_19 ///
q_15_6_28 q_15_6_33 q_15_6_25 q_15_6_29 q_15_6_35 q_15_6_36 q_15_6_37 q_15_6_38 q_15_6_39 q_15_6_40 q_15_6_41 q_15_6_42 ///
q_15_6_43 q_15_6_44 q_15_6_45 q_15_6_47 q_15_6_46 q_15_6_48 q_15_6_49 q_15_6_50 q_15_6_51 q_15_6_52 ///
q_15_6_60 q_15_6_61 q_15_6_62 q_15_6_63 q_15_6_64 q_15_6_65 q_15_6_67 q_15_6_66 q_15_6_68 q_15_6_69 q_15_6_3 q_15_6_30 ///
q_15_6_13 q_15_6_22 q_15_6_24 q_15_6_57 q_15_6_58 q_15_6_73 {
	replace `X'=0 if `X'==.
}
	
gen caloriesperweek=3600*q_15_6_2 + 3630*q_15_6_1 + 3270*q_15_6_4 + 726*q_15_6_5 + 3470* q_15_6_6 ///
+ 3610* q_15_6_7 + 3570*q_15_6_8 + 3540* q_15_6_9 + 8840* q_15_6_32  + 8730* q_15_6_27 ///
+ 780*q_15_6_21 + 140* q_15_6_26 + 590* q_15_6_23 + 3860* q_15_6_78 + 3800* q_15_6_79 + 3120* q_15_6_80 ///
+ 2590* q_15_6_55 + 2860* q_15_6_56 + 2930* q_15_6_70 + 690*q_15_6_71 + 3330*q_15_6_72 ///
+ 2770* q_15_6_74 + 3330* q_15_6_75 + 2470* q_15_6_76 + 3790* q_15_6_10 ///
+ 1240* q_15_6_12 + 2330* q_15_6_14 + 1570* q_15_6_15 + 5020* q_15_6_18 + 1340* q_15_6_17 + 1270* q_15_6_16 + 430* q_15_6_19 ///
+ 6930* q_15_6_28 + 9020* q_15_6_33 + 2965* q_15_6_25 + 3100* q_15_6_29 + 750* q_15_6_35 + 920* q_15_6_36 + 340* q_15_6_37 + 180*q_15_6_38 ///
+ 390* q_15_6_39 + 250* q_15_6_40 + 150* q_15_6_41 +330* q_15_6_42 + 370* q_15_6_43 + 270* q_15_6_44 + 170* q_15_6_45 +230* q_15_6_47 ///
+ 280* q_15_6_46 + 160* q_15_6_48 + 440* q_15_6_49 + 200*q_15_6_50 + 2870* q_15_6_51 + 190* q_15_6_52 ///
+ 490* q_15_6_60 + 670* q_15_6_61 + 270* q_15_6_62 + 460* q_15_6_63 + 2960* q_15_6_64 + 330* q_15_6_65 ///
+ 430* q_15_6_67 + 460* q_15_6_66 + 560* q_15_6_68 + 580* q_15_6_69 + 284* q_15_6_3 + 70* q_15_6_30 ///
+ 1320 *q_15_6_13 + 5070*q_15_6_22 + 1530*q_15_6_24 + 430*q_15_6_57 + 350*q_15_6_58 + 400* q_15_6_73

replace caloriesperweek=. if allfoodmissing==.
replace caloriesperweek=. if caloriesperweek==0 /*they could have eaten outside and they did accoring to the question but there is no way to measure calories*/

*If We would like to include other - build mean values before
*for 2007/11 ginger etc and drinks are listed, but not for 2005 - so do not include them 

*I use information from CSO, only for products not listed in their I take information from FAO
/*	the items that were not available in 2005, for other there is never a value 
q_15_6_11 other bread
q_15_6_13 veal 
q_15_6_20 other meat 
q_15_6_22 milk powder
q_15_6_24 churd 
q_15_6_31 other dairy
q_15_6_34 pther oil
q_15_6_57 pickled ve
q_15_6_58 green bean
q_15_6_59 other ve
q_15_6_73 mango
q_15_6_77 other fru*/
	
gen caloriesperday=caloriesperweek/7

gen calories_pm=caloriesperday/q_15_1 /*somehow all kcal are given *10 */
replace q_15_2=0 if q_15_2==.
replace q_15_3=0 if q_15_3==.
gen adj_hhmember=q_15_1+(q_15_2/(3*7))-(q_15_3/(3*7)) /*assuming 3 meals per day*/
replace adj_hhmember=0 if adj_hhmember<0
gen calories_pmg=caloriesperday/adj_hhmember /*somehow all kcal are given *10 */
gen foodinsecure=0 if calories_pm!=.
replace foodinsecure=1 if calories_pmg<2100
replace foodinsecure=1 if calories_pmg==. & calories_pm<2100

label var calories_pm "Daily calorie intake per household member (regular eaters within last 7 days)"
label var calories_pmg "Daily calorie intake per household member (regular eaters within last 7 days) adjusted by person-meals from guests and person-meals eaten outside"
label var foodinsecure "If daily per capita calorie intake is smaller 2100kcal (average in HH)"


************Consumption Expenditures***************

rename q_15_1 hhmember_ate
label var hhmember_ate "How many household members were resident and ate at least dinner regularly in the household during the last 7 days "
label var adj_hhmember "hhmember_ate adjusted by guest meals and meals eaten outside"

keep hhid province districtcode district year dietarydiversity fcs calories_pm calories_pmg foodinsecure hhmember_ate adj_hhmember
rename hhid hhid2007

unique districtcode
	
	

*********Durables, Housing conditions, bank account, child health

************Durables***************
*following Gollin, Kirchberger, Lagakos (2015) "Measuring Living Standards Across Space in the DevelopingWorld"
*Durables: television, car, mobile or landline 


save "stata\processed\livingstandards_2007.dta", replace

*-----------------------------------------2011 Data
use "stata\processed\NRVAData_2011.dta", clear


*********Food consumption
***************construct Food diversity index (8 food categories)
*"Dietary diversity is defined as the number of different foods or food groups eaten over a reference time period, not regarding the frequency of consumption."

egen n_mainstaple=rowtotal(Q_23_4_Rice_High_Quality Q_23_4_Rice_Low_Quality Q_23_4_Wheat_Flour Q_23_4_Purchased_Nan Q_23_4_Barley Q_23_4_Maize Q_23_4_Pasta Q_23_4_Other_Bread), missing

egen n_pulses=rowtotal(Q_23_4_Beans Q_23_4_Mung Q_23_4_Chick_Peas Q_23_4_Lentils), missing

egen n_vegetables=rowtotal(Q_23_4_Potato Q_23_4_Sweet_Potato Q_23_4_Onion Q_23_4_Tomato Q_23_4_Okra Q_23_4_Spinach Q_23_4_Cauliflower Q_23_4_Eggplant Q_23_4_Carrots ///
Q_23_4_Pumpkin Q_23_4_Cucumber Q_23_4_Radish Q_23_4_Turnip Q_23_4_Cabbage Q_23_4_Leek Q_23_4_Baracoli Q_23_4_Hot_Pepper Q_23_4_Wild_Leafy ///
Q_23_4_Dried_Tomatoes Q_23_4_Dried_Vegetables Q_23_4_Pickled_Vegetables Q_23_4_Green_Beans Q_23_4_Other_Vegetables Q_23_4_Tomato_Sauce), missing /*including tomato sauce*/

egen n_fruit=rowtotal(Q_23_4_Apple Q_23_4_Grapes Q_23_4_Melon Q_23_4_Peach Q_23_4_Fresh_Apricots Q_23_4_Dried_Apricots Q_23_4_Orange Q_23_4_Plum ///
Q_23_4_Pomegranate Q_23_4_Pear Q_23_4_Banana Q_23_4_Raisins Q_23_4_Fresh_Mulberries Q_23_4_Dried_Mulberries Q_23_4_Mangoes Q_23_4_Other_Fruit), missing

egen n_meatfish=rowtotal(Q_23_4_Beef Q_23_4_Veal Q_23_4_Mutton Q_23_4_Goat Q_23_4_Chicken Q_23_4_Liver Q_23_4_Dried_Meat Q_23_4_Fish Q_23_4_Other_Meat Q_23_4_Eggs), missing

egen n_milk=rowtotal(Q_23_4_Milk_Fresh Q_23_4_Milk_Powdered Q_23_4_Yogurt Q_23_4_Curd Q_23_4_Cheese Q_23_4_Other_Dairy), missing

egen n_sugar=rowtotal(Q_23_4_White_Sugar Q_23_4_Brown_Sugar Q_23_4_Honey), missing

egen n_oil=rowtotal(Q_23_4_Ghee Q_23_4_Butter Q_23_4_Vegetable_Oil Q_23_4_Animal_Fat Q_23_4_Other_Oils), missing

egen n_nuts=rowtotal(Q_23_4_Walnuts Q_23_4_Pistachio Q_23_4_Almonds), missing

*spices: Coriander_15_4 Mint_15_4

foreach var in mainstaple pulses vegetables fruit meatfish milk sugar oil {
	gen `var'=.
	replace `var'=1 if n_`var'>=1 & n_`var'!=.
	replace `var'=0 if n_`var'==0 & n_`var'!=.
}

egen dietarydiversity=rowtotal(mainstaple pulses vegetables fruit meatfish milk sugar oil), missing
		

************construct Food Consumption Score (8 food categories, weighted)***************

foreach var in mainstaple pulses vegetables fruit meatfish milk sugar oil {
gen n_`var'_nomiss=n_`var'
replace n_`var'_nomiss=0 if n_`var'_nomiss==.
}

*multiply Food frequency of each category with weights and take the sum
*Food frequency, in this context, is defined as the frequency (in terms of days of consumption over a reference period) that a specific food item or food group is eaten at the household level.
gen fcs=.
replace fcs=2*n_mainstaple_nomiss+3*n_pulses_nomiss+n_vegetables_nomiss+n_fruit_nomiss+4*n_meatfish_nomiss+4*n_milk_nomiss+0.5*n_sugar_nomiss+0.5*n_oil_nomiss
replace fcs=. if dietarydiversity==. /*the same should be missing for FCS as well*/

label var dietarydiversity "Dietary diversity: the number of different food groups eaten over the last 7 days, not regarding the frequency of consumption."
label var fcs "Food Consumption Score: weighted sum of the frequencies with which households consume foods within eight food groups over the previous week"

/*I follow D'Souza & D. Jolliffe 2012 "Rising Food Prices and Coping Strategies: Household-level Evidence from Afghanistan" and 
Wiesmann et al 2009 "Validation of the World Food Programme's Food Consumption Score and Alternative Indicators of Household Food Security"*/

	
	
************Calorie intake***************	
*FOOD COMPOSITION TABLE FOR USE IN THE NEAR EAST: http://www.fao.org/docrep/003/x6879e/X6879E03.htm#ch3.I.1
*http://cso.gov.af/Content/Media/Documents/CSO-WB_Tech-Report-Pov_v4(2)1162011121045651553325325.pdf
*for eggs (number) I took 70, for nan pieces I took 284 and for maize (corn) I took 726 from this source: 
*http://siteresources.worldbank.org/AFGHANISTANEXTN/Resources/305984-1326909014678/8376871-1334700522455/NRVA0708-Quality.pdf (download 28.12.2017)

egen allfoodmissing=rowtotal(Q_23_6_Wheat_Flour Q_23_6_Rice_High_Quality Q_23_6_Rice_Low_Quality Q_23_6_Barley Q_23_6_Maize Q_23_6_Beans ///
Q_23_6_Mung Q_23_6_Chick_Peas Q_23_6_Lentils Q_23_6_Vegetable_Oil Q_23_6_Ghee Q_23_6_Milk_Fresh Q_23_6_Dogh Q_23_6_Yogurt Q_23_6_White_Sugar ///
Q_23_6_Brown_Sugar Q_23_6_Honey Q_23_6_Dried_Tomatoes Q_23_6_Dried_Vegetables Q_23_6_Raisins Q_23_6_Fresh_Mulberries Q_23_6_Dried_Mulberries ///
Q_23_6_Walnuts Q_23_6_Pistachio Q_23_6_Almonds Q_23_6_Pasta Q_23_6_Beef Q_23_6_Mutton Q_23_6_Goat Q_23_6_Dried_Meat Q_23_6_Liver Q_23_6_Chicken Q_23_6_Fish ///
Q_23_6_Butter  Q_23_6_Animal_Fat Q_23_6_Krut Q_23_6_Cheese Q_23_6_Potato Q_23_6_Sweet_Potato Q_23_6_Onion Q_23_6_Tomato Q_23_6_Okra Q_23_6_Spinach Q_23_6_Cauliflower ///
Q_23_6_Eggplant Q_23_6_Carrots Q_23_6_Pumpkin Q_23_6_Cucumber Q_23_6_Turnip Q_23_6_Radish Q_23_6_Cabbage Q_23_6_Leek Q_23_6_Baracoli Q_23_6_Hot_Pepper ///
Q_23_6_Wild_Leafy Q_23_6_Apple Q_23_6_Grapes Q_23_6_Melon Q_23_6_Peach Q_23_6_Fresh_Apricots Q_23_6_Dried_Apricots Q_23_6_Orange ///
Q_23_6_Pomegranate Q_23_6_Plum Q_23_6_Pear Q_23_6_Banana Q_23_6_Purchased_Nan Q_23_6_Eggs Q_23_6_Veal Q_23_6_Milk_Powdered Q_23_6_Curd Q_23_6_Pickled_Vegetables ///
Q_23_6_Green_Beans Q_23_6_Mangoes), missing

foreach X of varlist Q_23_6_Wheat_Flour Q_23_6_Rice_High_Quality Q_23_6_Rice_Low_Quality Q_23_6_Barley Q_23_6_Maize Q_23_6_Beans ///
Q_23_6_Mung Q_23_6_Chick_Peas Q_23_6_Lentils Q_23_6_Vegetable_Oil Q_23_6_Ghee Q_23_6_Milk_Fresh Q_23_6_Dogh Q_23_6_Yogurt Q_23_6_White_Sugar ///
Q_23_6_Brown_Sugar Q_23_6_Honey Q_23_6_Dried_Tomatoes Q_23_6_Dried_Vegetables Q_23_6_Raisins Q_23_6_Fresh_Mulberries Q_23_6_Dried_Mulberries ///
Q_23_6_Walnuts Q_23_6_Pistachio Q_23_6_Almonds Q_23_6_Pasta Q_23_6_Beef Q_23_6_Mutton Q_23_6_Goat Q_23_6_Dried_Meat Q_23_6_Liver Q_23_6_Chicken Q_23_6_Fish ///
Q_23_6_Butter  Q_23_6_Animal_Fat Q_23_6_Krut Q_23_6_Cheese Q_23_6_Potato Q_23_6_Sweet_Potato Q_23_6_Onion Q_23_6_Tomato Q_23_6_Okra Q_23_6_Spinach Q_23_6_Cauliflower ///
Q_23_6_Eggplant Q_23_6_Carrots Q_23_6_Pumpkin Q_23_6_Cucumber Q_23_6_Turnip Q_23_6_Radish Q_23_6_Cabbage Q_23_6_Leek Q_23_6_Baracoli Q_23_6_Hot_Pepper ///
Q_23_6_Wild_Leafy Q_23_6_Apple Q_23_6_Grapes Q_23_6_Melon Q_23_6_Peach Q_23_6_Fresh_Apricots Q_23_6_Dried_Apricots Q_23_6_Orange ///
Q_23_6_Pomegranate Q_23_6_Plum Q_23_6_Pear Q_23_6_Banana Q_23_6_Purchased_Nan Q_23_6_Eggs Q_23_6_Veal Q_23_6_Milk_Powdered Q_23_6_Curd Q_23_6_Pickled_Vegetables ///
Q_23_6_Green_Beans Q_23_6_Mangoes {
replace `X'=0 if `X'==.
}

gen caloriesperweek=3600*Q_23_6_Wheat_Flour + 3630*Q_23_6_Rice_High_Quality + 3630* Q_23_6_Rice_Low_Quality+ 3270*Q_23_6_Barley + 726*Q_23_6_Maize + 3470* Q_23_6_Beans ///
+ 3610* Q_23_6_Mung + 3570*Q_23_6_Chick_Peas + 3540* Q_23_6_Lentils + 8840* Q_23_6_Vegetable_Oil  + 8730* Q_23_6_Ghee ///
+ 780*Q_23_6_Milk_Fresh + 140* Q_23_6_Dogh + 590* Q_23_6_Yogurt + 3860* Q_23_6_White_Sugar + 3800* Q_23_6_Brown_Sugar + 3120* Q_23_6_Honey ///
+ 2590* Q_23_6_Dried_Tomatoes + 2860* Q_23_6_Dried_Vegetables + 2930* Q_23_6_Raisins + 690*Q_23_6_Fresh_Mulberries + 3330*Q_23_6_Dried_Mulberries ///
+ 2770* Q_23_6_Walnuts + 3330* Q_23_6_Pistachio + 2470* Q_23_6_Almonds + 3790* Q_23_6_Pasta ///
+ 1240* Q_23_6_Beef + 2330* Q_23_6_Mutton + 1570* Q_23_6_Goat + 5020* Q_23_6_Dried_Meat + 1340* Q_23_6_Liver + 1270* Q_23_6_Chicken + 430* Q_23_6_Fish ///
+ 6930* Q_23_6_Butter + 9020* Q_23_6_Animal_Fat + 2965* Q_23_6_Krut + 3100* Q_23_6_Cheese + 750* Q_23_6_Potato + 920* Q_23_6_Sweet_Potato + 340* Q_23_6_Onion + 180*Q_23_6_Tomato ///
+ 390* Q_23_6_Okra + 250* Q_23_6_Spinach + 150* Q_23_6_Cauliflower +330* Q_23_6_Eggplant + 370* Q_23_6_Carrots + 270* Q_23_6_Pumpkin + 170* Q_23_6_Cucumber +230* Q_23_6_Turnip ///
+ 280* Q_23_6_Radish + 160* Q_23_6_Cabbage + 440* Q_23_6_Leek + 200*Q_23_6_Baracoli + 2870* Q_23_6_Hot_Pepper + 190* Q_23_6_Wild_Leafy ///
+ 490* Q_23_6_Apple + 670* Q_23_6_Grapes + 270* Q_23_6_Melon + 460* Q_23_6_Peach + 520* Q_23_6_Fresh_Apricots + 2960* Q_23_6_Dried_Apricots + 330* Q_23_6_Orange ///
+ 430* Q_23_6_Pomegranate + 460* Q_23_6_Plum + 560* Q_23_6_Pear + 580* Q_23_6_Banana + 284* Q_23_6_Purchased_Nan + 70* Q_23_6_Eggs ///
+ 1320* Q_23_6_Veal +5070*Q_23_6_Milk_Powdered + 1530*Q_23_6_Curd + 430*Q_23_6_Pickled_Vegetables+ 350*Q_23_6_Green_Beans + 400*Q_23_6_Mangoes

/*
Q_23_6_Other_Bread
Q_23_6_Veal
Q_23_6_Other_Meat
Q_23_6_Milk_Powdered
Q_23_6_Curd
Q_23_6_Other_Dairy
Q_23_6_Other_Oils
Q_23_6_Pickled_Vegetables
Q_23_6_Green_Beans
Q_23_6_Other_Vegetables
Q_23_6_Mangoes
Q_23_6_Other_Fruit*/
	
replace caloriesperweek=. if allfoodmissing==.
replace caloriesperweek=. if caloriesperweek==0 /*they could have eaten outside and they did accoring to the question but there is no way to measure calories*/


gen caloriesperday=caloriesperweek/7

gen calories_pm=caloriesperday/Q_23_1 /*somehow all kcal are given *10 */
replace Q_23_2=0 if Q_23_2==.
replace Q_23_3=0 if Q_23_3==.
gen adj_hhmember=Q_23_1+(Q_23_2/(3*7))-(Q_23_3/(3*7)) /*assuming 3 meals per day*/
replace adj_hhmember=0 if adj_hhmember<0
gen calories_pmg=caloriesperday/adj_hhmember /*somehow all kcal are given *10 */
gen foodinsecure=0 if calories_pm!=.
replace foodinsecure=1 if calories_pmg<2100
replace foodinsecure=1 if calories_pmg==. & calories_pm<2100

label var calories_pm "Daily calorie intake per household member (regular eaters within last 7 days)"
label var calories_pmg "Daily calorie intake per household member (regular eaters within last 7 days) adjusted by person-meals from guests and person-meals eaten outside"
label var foodinsecure "If daily per capita calorie intake is smaller 2100kcal (average in HH)"

rename Q_23_1 hhmember_ate
label var hhmember_ate "How many household members were resident and ate at least dinner regularly in the household during the last 7 days "
label var adj_hhmember "hhmember_ate adjusted by guest meals and meals eaten outside"


keep hhid province districtcode district year dietarydiversity fcs calories_pm calories_pmg foodinsecure hhmember_ate adj_hhmember
rename hhid hhid2011	

unique districtcode
save "stata\processed\livingstandards_2011.dta", replace

		

*----------------------------------------Append data

use "stata\processed\livingstandards_2005.dta", clear		

append using "stata\processed\livingstandards_2007.dta"		
append using "stata\processed\livingstandards_2011.dta"	

gen hhid=hhid2005 if year==2005
replace hhid=hhid2007 if year==2007 | year==2008
replace hhid=hhid2011 if year==2011 | year==2012

unique districtcode

save "stata\processed\livingstandards.dta", replace

********************************************************************************


*----------------------------------2005 Data 


use "stata\processed\hhfoodexpenditures.dta", clear

gen wave=2005 if year==2005
replace wave=2007 if year==2007 | year==2008
replace wave=2011 if year==2011 | year==2012

*following Technical Report by CSO/World Bank - no hh equivalence scales have been used since they are not available for Afghanistan - thus just used HHsize
*need to add expenditures for food/drinks outside home



******ALL NRVA datasets******

merge 1:1 hhid year using "stata\processed\livingstandards.dta"
drop _merge

merge 1:1 hhid year using "stata\processed\covariates.dta"
drop _merge

unique districtcode


************** process some variables **************

foreach X in exp_2005 exp_2011 exp_paasche2011 exp_lasp2011 exp_paasche2005 exp_lasp2005 exp_total_2011 exp_total_paasche2011 exp_total_lasp2011 exp_total_2005 exp_total_paasche2005 exp_total_lasp2005 {
gen `X'_adj=hh`X'/adj_hhmember
local variable_label : variable label hh`X'
local variable_label : subinstr local variable_label "Household" "Individual (using adjusted member that regularly ate)"
label var `X' "`variable_label'"
}

gen hhweight=.
replace hhweight=hhweight2005 if wave==2005
replace hhweight=hhweight2007 if wave==2007
replace hhweight=hhweight2011 if wave==2011

save "stata\processed\finaldata_hhlevel.dta", replace

*/
	


/*******************************************************************************
*                       Commodity Prices Afghanistan                           *
*******************************************************************************/

**Souce FAO: http://faostat3.fao.org/download/P/PP/E

***Commodity Prices from Afghanistan
foreach code in wheat barley maize potato rice {
import delimited "FAO_`code'1991_2014.csv", clear
	keep elementname year value flagd
	keep if elementname=="Producer Price (LCU/tonne)"
	drop elementname flagd 
	rename value `code'_AFGprice
	label var `code'_AFGprice "Producer Price in Afghanistan: `code' in LCU/tonne"
	destring year, replace
	save "processed/`code'price.dta", replace
	clear
}


***International Prices  - monthly data
***source: http://www.imf.org/external/np/res/commod/index.aspx
 
import excel "IMF_internationalprices.xls", sheet("External") cellrange(A3:DO476) firstrow
	drop if CommodityDescription=="Frequency"
	rename CommodityDescription month
	keep month BarleyCanadianno1WesternBa MaizecornUSNo2Yellow Rice5percentbrokenmilledwh WheatNo1HardRedWinterord

		gen year=month
		
		forvalues i = 1980(1)2016 { 
			replace month = subinstr(month, "`i'", "",.)
		}
		forvalues i = 2(1)12 { 
			replace year = subinstr(year, "M`i'", "",.)
		}	
		replace year = subinstr(year, "M1", "",.)
		replace month = subinstr(month, "M", "",.)
		
	rename BarleyCanadianno1WesternBa barley_priceint
	rename MaizecornUSNo2Yellow maize_priceint
	rename Rice5percentbrokenmilledwh rice_priceint
	rename WheatNo1HardRedWinterord wheat_priceint
	
	foreach var in year month barley_priceint maize_priceint rice_priceint wheat_priceint {
		destring `var', replace
	}
	
	sort year month
	collapse (mean) barley_priceint maize_priceint rice_priceint wheat_priceint , by(year)
	
	foreach X of varlist barley_priceint maize_priceint rice_priceint wheat_priceint {
		label var `X' "IMF international prices of `X', current USD"
	}
	
save "processed\intcommodityprice.dta", replace



/*******************************************************************************
*                               Ethnicity Data                                 *
*******************************************************************************/

import excel "Ethnicities_and_Trafficking.xlsx", sheet("Sheet1") firstrow clear

unique OBJECTID
tab OBJECTID

unique OBJECTID_1
tab OBJECTID_1


drop OBJECTID_1
merge 1:1 OBJECTID using "processed\districtnames.dta" 
drop OBJECTID PROV_34_NA DIST_34_NA DISTID PROVID Shape_Length _merge

save "processed\ethnicconnections.dta", replace 


***********Ethnicity dataset
*** Create ethnicity dataset ***

import excel "GREG_Ethnicity.xls", clear firstrow

keep DISTID TARGET_FID western_southern_border_ethnic_c General_Border_ethnic_connection Pashtuns PROV_34_NA DIST_34_NA Taliban_Territory_1996 Dschunbisch_Territory_1996 Hizb_i_Wahclat_Territory_1996 Dschamiat_Territory_1996

tab Taliban_Territory_1996, missing

collapse (mean) western_southern_border_ethnic_c General_Border_ethnic_connection Pashtuns Taliban_Territory_1996 Dschunbisch_Territory_1996 Hizb_i_Wahclat_Territory_1996 ///
Dschamiat_Territory_1996 (firstnm) PROV_34_NA DIST_34_NA, by( TARGET_FID DISTID)

order DISTID DIST_34_NA PROV_34_NA 

sort DISTID

tab Taliban_Territory_1996, missing
summarize Taliban_Territory_1996


*DISTID 2010 is Zanda  Jan in Herat - belongs to Taliban Territory
* DISTID 1125 is Warduj - belongs to Dschamiat_Territory_1996

rename western_southern_border_ethnic_c Border_Ethnicicty_IranPakistan
rename General_Border_ethnic_connection Border_Ethnicity_All
rename DISTID districtcode

replace Pashtuns = 1 if Pashtuns > 0
replace Border_Ethnicity_All = 1 if Border_Ethnicity_All > 0
replace Border_Ethnicicty_IranPakistan = 1 if Border_Ethnicicty_IranPakistan > 0
replace Taliban_Territory_1996 = 1 if Taliban_Territory_1996 > 0
replace Dschunbisch_Territory_1996 = 1 if Dschunbisch_Territory_1996 > 0
replace Hizb_i_Wahclat_Territory_1996 = 1 if Hizb_i_Wahclat_Territory_1996 > 0
replace Dschamiat_Territory_1996 = 1 if Dschamiat_Territory_1996 > 0


tab Taliban_Territory_1996, missing


ren PROV_34_NA province_alternative
ren DIST_34_NA district_alternative
replace province_alternative = ustrlower(province_alternative)
replace district_alternative = ustrlower(district_alternative)
replace district_alternative = strrtrim(district_alternative), nopromote
sort province_alternative district_alternative 
gen count =[_n]
replace district_alternative="zanda jan" if count==164
replace district_alternative="warduj" if count==24
drop count

sort districtcode

summarize Taliban_Territory_1996
tab Taliban_Territory_1996, missing

save "processed\Ethnicity_connections.dta", replace



***Ethnic Data
import excel "GREG_ethnologue_comparison.xlsx",firstrow clear
merge 1:1 OBJECTID using "processed\districtnames.dta" 

unique OBJECTID
tab OBJECTID

unique districtcode
tab districtcode

drop _merge OBJECTID PROV_34_NA DIST_34_NA

rename number_ethnicities_greg no_ethnic_greg
rename D_Pashtun_greg pashtun_greg
rename numberethnicitiesinethnologue no_ethnic_ethno
rename G no_ethnic_ethnol2
rename D_Pashtun_ethno pashtun_ethno

save "processed\ethnicgroups_greg_ethno.dta", replace 



/*******************************************************************************
*                         Ruggedness and Suitability                           *
*******************************************************************************/

***Import ruggedness from ARCGIS
import delimited "NunnPuga_ruggedness_fromarcgis.csv", clear
rename objectid OBJECTID
save "processed\NunnPuga_ruggedness.dta", replace 


****************Further Distance measures + latitude/longitude

import excel "district398_cent_raw.xls", sheet("district398_cent_raw") firstrow clear
drop OBJECTID_1
merge 1:1 OBJECTID using "processed\districtnames.dta"
drop _merge

unique OBJECTID
tab OBJECTID

unique districtcode
tab districtcode

drop PROV_34_NA DIST_34_NA DISTID PROVID OBJECTID

save "processed\latitute_distances.dta", replace




***New suitability data from processed_soil from FAO 
*********************************************Crop suitability index (classes)
**intermediate rainfed


foreach code in _brl _mze _rcw _whe _wpo   {
	import excel "siir`code'.xls", firstrow clear
	rename MEAN MEAN`code'
	drop OID
	save "processed/siir`code'.dta", replace

}

use "processed\districtnames.dta", clear


foreach code in _brl _mze _rcw _whe _wpo  {
	merge 1:1 OBJECTID using "processed/siir`code'.dta"
	tab _merge
	drop _merge
}

unique OBJECTID
tab OBJECTID

unique districtcode
tab districtcode

drop COUNT AREA

rename MEAN_brl suitability_ir_barley
rename MEAN_mze suitability_ir_maize
rename MEAN_rcw suitability_ir_rice
rename MEAN_whe suitability_ir_wheat
rename MEAN_wpo suitability_ir_potato

label var suitability_ir_barley "Crop suitability index (class) for current cultivated land for intermediate input level rainfed barley"
label var suitability_ir_maize "Crop suitability index (class) for current cultivated land for intermediate input level rainfed maize"
label var suitability_ir_rice "Crop suitability index (class) for current cultivated land for intermediate input level rainfed wetland rice"
label var suitability_ir_wheat "Crop suitability index (class) for current cultivated land for intermediate input level rainfed wheat"
label var suitability_ir_potato "Crop suitability index (class) for current cultivated land for intermediate input level rainfed white potato"

save "processed/suitability_rainfed_classes.dta", replace


**intermediate irrigated 

foreach code in _brl _mze _rcw _whe _wpo   {
	import excel "scii`code'.xls", firstrow clear
	rename MEAN MEAN`code'
	drop OID
	save "processed/scii`code'.dta", replace
}

use "processed\districtnames.dta", clear


foreach code in _brl _mze _rcw _whe _wpo  {
	merge 1:1 OBJECTID using "processed/scii`code'.dta"
	drop _merge
}

unique OBJECTID
tab OBJECTID

unique districtcode
tab districtcode

drop COUNT AREA

rename MEAN_brl suitability_ii_barley
rename MEAN_mze suitability_ii_maize
rename MEAN_rcw suitability_ii_rice
rename MEAN_whe suitability_ii_wheat
rename MEAN_wpo suitability_ii_potato

label var suitability_ii_barley "Crop suitability index (class) for current cultivated land for intermediate input level irrigated barley"
label var suitability_ii_maize "Crop suitability index (class) for current cultivated land for intermediate input level irrigated maize"
label var suitability_ii_rice "Crop suitability index (class) for current cultivated land for intermediate input level irrigated wetland rice"
label var suitability_ii_wheat "Crop suitability index (class) for current cultivated land for intermediate input level irrigated wheat"
label var suitability_ii_potato "Crop suitability index (class) for current cultivated land for intermediate input level irrigated white potato"

save "processed/suitability_irrigated_classes.dta", replace



*********************************************Crop suitability index (continuous measure)
**intermediate rainfed

foreach code in _brl _mze _whe _wpo   {
	import excel "sxir`code'.xls", firstrow clear
	rename MEAN MEAN`code'
	drop OID
	save "processed/sxir`code'.dta", replace
}

use "processed\districtnames.dta"


foreach code in _brl _mze _whe _wpo  {
	merge 1:1 OBJECTID using "processed/sxir`code'.dta"
	drop _merge
}

unique OBJECTID
tab OBJECTID


drop COUNT AREA

rename MEAN_brl suitability_vir_barley
rename MEAN_mze suitability_vir_maize
rename MEAN_whe suitability_vir_wheat
rename MEAN_wpo suitability_vir_potato

label var suitability_vir_barley "Crop suitability index (values) for current cultivated land for intermediate input level rainfed barley"
label var suitability_vir_maize "Crop suitability index (values) for current cultivated land for intermediate input level rainfed maize"
label var suitability_vir_wheat "Crop suitability index (values) for current cultivated land for intermediate input level rainfed wheat"
label var suitability_vir_potato "Crop suitability index (values) for current cultivated land for intermediate input level rainfed white potato"

save "processed/suitability_rainfed_values.dta", replace



**intermediate irrigated 
foreach code in _brl _mze _whe _wpo   {
import excel "suii`code'.xls", firstrow clear
	rename MEAN MEAN`code'
	drop OID
	save "processed/suii`code'.dta", replace
}

use "processed\districtnames.dta"


foreach code in _brl _mze _whe _wpo  {
	merge 1:1 OBJECTID using "processed/suii`code'.dta"
	drop _merge
}


drop COUNT AREA

rename MEAN_brl suitability_vii_barley
rename MEAN_mze suitability_vii_maize
rename MEAN_whe suitability_vii_wheat
rename MEAN_wpo suitability_vii_potato

label var suitability_vii_barley "Crop suitability index (values) for current cultivated land for intermediate input level irrigated barley"
label var suitability_vii_maize "Crop suitability index (values) for current cultivated land for intermediate input level irrigated maize"
label var suitability_vii_wheat "Crop suitability index (values) for current cultivated land for intermediate input level irrigated wheat"
label var suitability_vii_potato "Crop suitability index (values) for current cultivated land for intermediate input level irrigated white potato"

save "processed/suitability_irrigated_values.dta", replace



***********************************************************************************************************************
************************************************Opium Suitability******************************************************


***Suitability inlcuding 4 indicators
import excel "opium_suitability.xls", sheet("Opium_out") firstrow clear
drop FID
rename ES_AWM suitability_opium
rename ES_MIN suitiability_min_opium
rename ES_MAX suitiability_max_opium
merge 1:1 OBJECTID using "processed\districtnames.dta"
drop _merge OBJECTID PROV_34_NA DIST_34_NA DISTID PROVID

save "processed\opiumsuitability.dta", replace



***Suitability inlcuding 4 indicators - Raster data
import excel "opium.xls", sheet("opium") firstrow clear
drop OID COUNT
rename MEAN suitabilty_raster_opium
merge 1:1 OBJECTID using "processed\districtnames.dta" 
drop _merge
save "processed\opiumsuitability_raster.dta", replace


***Suitability - population weighted
import excel "opium_weighted.xls", firstrow clear
drop FID 
rename ESpAWM suitability_weighted_opium
rename ESpMIN suitability_min_weighted_opium
rename ESpMAX suitability_max_weighted_opium
merge 1:1 OBJECTID using "processed\districtnames.dta"
drop _merge OBJECTID PROV_34_NA DIST_34_NA DISTID PROVID ES_AWM ES_MIN ES_MAX
save "processed\opiumsuitability_weighted.dta", replace


***Suitability - population weighted
import excel "opium_suitability_weighted.xls", firstrow clear
drop FID 
rename ESRpAWM suitability_Rweighted_opium
rename ESRpMIN suitability_min_Rweighted_opium
rename ESRpMAX suitability_max_Rweighted_opium
rename WESRpAWM suitability_Rweighted_wheat
merge 1:1 OBJECTID using "processed\districtnames.dta" 
drop _merge OBJECTID PROV_34_NA DIST_34_NA DISTID PROVID ES_AWM ES_MIN ES_MAX WESRpMIN WESRpMAX ESpAWM ESpMIN ESpMAX
rename Border border
save "processed\opiumsuitability_raster_weighted.dta", replace



/*******************************************************************************
*                              Distance Measures                               *
*******************************************************************************/

import delimited "distances.csv", clear delimiter(";") varnames(1)
rename objectid OBJECTID
merge 1:1 OBJECTID using "processed\districtnames.dta"

unique OBJECTID
tab OBJECTID

drop _merge distid provid OBJECTID prov_34_na dist_34_na originid route_name

label var total_time_1 "Time used for the 2d route calculated using Max_Speed_1 in hours"
label var total_time_2 "Time used for the 2d route calculated using Max_Speed_2 in hours"
label var total_time_3 "Time used for the 2d route calculated using Max_Speed_3 in hours"
label var total_slength "3D length of the route in meters"
label var total_stime_1 "Time used for the 3d route calculated using Max_Speed_1 in hours"
label var total_stime_2 "Time used for the 3d route calculated using Max_Speed_2 in hours"
label var total_stime_3 "Time used for the 3d route calculated using Max_Speed_3 in hours"
label var total_length "2D length of the route in meters"
label var snapping_distance "Distance to road network of the district centroid"

save "processed\routedistance.dta", replace	



/*******************************************************************************
*                               Nightlight Data                                *
*******************************************************************************/

foreach code in F121995 F121996 F121997 F141997 F121998 F141998 F121999 F141999 F142000 F152000 F142001 F152001 F142002 F152002 ///
F142003 F152003 F152004 F162004 F152005 F162005 F152006 F162006 F152007 F162007 F162008 F162009 F182010 F182011 F182012 F182013   {

import excel "`code'.xls", firstrow clear
	gen year="`code'"
	replace year =subinstr( year ,"F12","",.)
	replace year =subinstr( year ,"F14","",.)
	replace year =subinstr( year ,"F15","",.)
	replace year =subinstr( year ,"F16","",.)
	replace year =subinstr( year ,"F18","",.)
	destring year, replace
	rename MEAN MEAN`code'
	drop OID
	save "processed/`code'.dta", replace
}


use "processed\districtnames.dta", clear
expand 19
bysort OBJECTID: gen year=1995+_n-1



foreach code in F121995 F121996 F121997 F141997 F121998 F141998 F121999 F141999 F142000 F152000 F142001 F152001 F142002 F152002 ///
F142003 F152003 F152004 F162004 F152005 F162005 F152006 F162006 F152007 F162007 F162008 F162009 F182010 F182011 F182012 F182013   {
	merge 1:1 OBJECTID year using "processed/`code'.dta"
	drop _merge
}

gen nightlight=.

foreach code in 1997 1998 1999  {
	egen meann`code'=rmean(MEANF12`code' MEANF14`code') if year==`code'
}

foreach code in 2000 2001 2002 2003 {
	egen meann`code'=rmean(MEANF14`code' MEANF15`code') if year==`code'
}

foreach code in 2004 2005 2006 2007  {
	egen meann`code'=rmean(MEANF15`code' MEANF16`code') if year==`code'
}

foreach code in 1995 1996 {
	replace nightlight=MEANF12`code' if year==`code'
}

foreach code in 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 {
	replace nightlight=meann`code' if year==`code'
}

foreach code in 2008 2009 {
	replace nightlight=MEANF16`code' if year==`code'
}

foreach code in 2010 2011 2012 2013 {
	replace nightlight=MEANF18`code' if year==`code'
}

drop COUNT AREA MEAN* OBJECTID meann*
label var nightlight "Nightlight data per district"	


save "processed\nightlight.dta", replace



/*******************************************************************************
*                            Opium Eradication Data                            *
*******************************************************************************/
			
***Data preparation***
											
*source: ESOC Afghanistan Data 2006-2009
******************************************************
import delimited using "erad.csv", clear
	
***Merge with Merge_Districtnames

rename district PossibleSpellings
replace PossibleSpellings=lower(PossibleSpellings)

rename province Province_Name
replace Province_Name=lower(Province_Name)

replace PossibleSpellings="kandahar" if PossibleSpellings=="dand" /*see geonames*/
replace PossibleSpellings="dawlatabad" if PossibleSpellings=="daw lat abad" /*see geonames*/
replace PossibleSpellings="dawlatabad" if PossibleSpellings=="daw lat abad" /*see geonames*/
replace PossibleSpellings="fayzabad" if PossibleSpellings=="faizabad" /*see geonames*/
replace Province_Name="jawzjan" if Province_Name=="jaw zjan"
replace PossibleSpellings="naher-i-saraj" if PossibleSpellings=="greshk" & Province_Name=="hilmand" 
replace PossibleSpellings="lashkar gah" if PossibleSpellings=="khanashen"
replace PossibleSpellings="nili" if PossibleSpellings=="nelay" & Province_Name=="day kundi"  /*according to Wasim*/
replace PossibleSpellings="qalay-i-kah" if PossibleSpellings=="pusht koh" & Province_Name=="farah"  /*according to Wasim*/
replace PossibleSpellings="shaygal wa shiltan" if PossibleSpellings=="shegal" & Province_Name=="kunar"  /*according to Wasim*/
replace PossibleSpellings="gizab" if PossibleSpellings=="tamzan" & Province_Name=="day kundi"  /*according to Wasim*/
replace PossibleSpellings="arghandab (kandahar)" if PossibleSpellings=="arghandab" & Province_Name=="kandahar"
replace PossibleSpellings="arghandab (zabul)" if PossibleSpellings=="arghandab" & Province_Name=="zabul"
replace PossibleSpellings="baharak (badakhshan)" if PossibleSpellings=="baharak" & Province_Name=="badakhshan"
replace PossibleSpellings="baharak (takhar)" if PossibleSpellings=="baharak" & Province_Name=="takhar"
replace PossibleSpellings="dawlatabad (balkh)" if PossibleSpellings=="dawlatabad" & Province_Name=="balkh"
replace PossibleSpellings="dawlatabad (faryab)" if PossibleSpellings=="dawlatabad" & Province_Name=="faryab"
replace PossibleSpellings="dawlatabad (balkh)" if PossibleSpellings=="dawlat abad" & Province_Name=="balkh"
replace PossibleSpellings="dawlatabad (faryab)" if PossibleSpellings=="dawlat abad" & Province_Name=="faryab"
replace PossibleSpellings="fayzabad (badakhshan)" if PossibleSpellings=="fayzabad" & Province_Name=="Badakhshan"
replace PossibleSpellings="fayzabad (jawzjan)" if PossibleSpellings=="fayzabad" & Province_Name=="jawzjan"
replace PossibleSpellings="fayzabad (badakhshan)" if PossibleSpellings=="fayzabad" & Province_Name=="badakhshan"
replace PossibleSpellings="fayzabad (jawzjan)" if PossibleSpellings=="fayzabad" & Province_Name=="jawzjan"
replace PossibleSpellings="jani khel (paktya)" if PossibleSpellings=="jani khel" & Province_Name=="paktya"
replace PossibleSpellings="jani khel (paktika)" if PossibleSpellings=="jani khel" & Province_Name=="paktika"	
replace PossibleSpellings="kohistan (badakhshan)" if PossibleSpellings=="kohistan" & Province_Name=="badakhshan"
replace PossibleSpellings="kohistan (faryab)" if PossibleSpellings=="kohistan" & Province_Name=="faryab"
replace PossibleSpellings="muqur (ghazni)" if PossibleSpellings=="muqur" & Province_Name=="ghazni"
replace PossibleSpellings="muqur (badghis)" if PossibleSpellings=="muqur" & Province_Name=="badghis"		
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qarabagh" & Province_Name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qara bagh" & Province_Name=="ghazni"
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qara bagh" & Province_Name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qarrabagh" & Province_Name=="ghazni"
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qarrabagh" & Province_Name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qarabagh" & Province_Name=="ghazni"
replace PossibleSpellings="reg (hilmand)" if PossibleSpellings=="reg" & Province_Name=="hilmand"
replace PossibleSpellings="reg (kandahar)" if PossibleSpellings=="reg" & Province_Name=="kandahar"
	
merge m:1 PossibleSpellings using "processed\temp.dta" 
drop if _merge== 2 
* only one not matched
drop if _merge == 1
drop _merge
drop PossibleSpellings  Province_Name dist_396_id distid398 provid398 
	
duplicates list district year
*there are duplicates in terms of district year: this is because of different erad_type
*therefore collapse sum:

*but first save labels:
foreach v of var * {
	local l`v' : variable label `v'
	if `"`l`v''"' == "" {
		local l`v' "`v'"
	}
}

drop erad_type
collapse (sum) erad_claimed erad_claim_survey erad_survey erad_tot opiumcult_survey ///
erad_v erad_fields erad_villages opiumcult_post pct_opium pct_erad ///
(last) province districtcode , by(district year)

*missings are now 0!

*use saved labels
foreach v of var * {
	label var `v' "`l`v''"
}

order district districtcode province year
sort district year 
save "processed\erad20062009_processed.dta", replace
	
	
******************************************************
***Opium Eradication Data
*source: UNODC reports coded by RA from 2010-2015
******************************************************
	
import delimited using "erad_coded2015.csv", clear

*first drop the 2006-2009, and the years with no data 2003-2005 (this was only to provide the RA with a structure and examples to code)
drop *2003 *2004 *2005 *2006 *2007 *2008 *2009 

drop distid398 temp n _merge
	
***Merge with Merge_Districtnames

rename distname398 PossibleSpellings
replace PossibleSpellings=lower(PossibleSpellings)

rename provincename34 Province_Name
replace Province_Name=lower(Province_Name)

replace PossibleSpellings="kandahar" if PossibleSpellings=="dand" /*see geonames*/
replace PossibleSpellings="dawlatabad" if PossibleSpellings=="daw lat abad" /*see geonames*/
replace PossibleSpellings="dawlatabad" if PossibleSpellings=="daw lat abad" /*see geonames*/
replace PossibleSpellings="fayzabad" if PossibleSpellings=="faizabad" /*see geonames*/
replace Province_Name="jawzjan" if Province_Name=="jaw zjan"
replace PossibleSpellings="naher-i-saraj" if PossibleSpellings=="greshk" & Province_Name=="hilmand" 
replace PossibleSpellings="lashkar gah" if PossibleSpellings=="khanashen"
replace PossibleSpellings="nili" if PossibleSpellings=="nelay" & Province_Name=="day kundi"  /*according to Wasim*/
replace PossibleSpellings="qalay-i-kah" if PossibleSpellings=="pusht koh" & Province_Name=="farah"  /*according to Wasim*/
replace PossibleSpellings="shaygal wa shiltan" if PossibleSpellings=="shegal" & Province_Name=="kunar"  /*according to Wasim*/
replace PossibleSpellings="gizab" if PossibleSpellings=="tamzan" & Province_Name=="day kundi"  /*according to Wasim*/
replace PossibleSpellings="arghandab (kandahar)" if PossibleSpellings=="arghandab" & Province_Name=="kandahar"
replace PossibleSpellings="arghandab (zabul)" if PossibleSpellings=="arghandab" & Province_Name=="zabul"
replace PossibleSpellings="baharak (badakhshan)" if PossibleSpellings=="baharak" & Province_Name=="badakhshan"
replace PossibleSpellings="baharak (takhar)" if PossibleSpellings=="baharak" & Province_Name=="takhar"
replace PossibleSpellings="dawlatabad (balkh)" if PossibleSpellings=="dawlatabad" & Province_Name=="balkh"
replace PossibleSpellings="dawlatabad (faryab)" if PossibleSpellings=="dawlatabad" & Province_Name=="faryab"
replace PossibleSpellings="dawlatabad (balkh)" if PossibleSpellings=="dawlat abad" & Province_Name=="balkh"
replace PossibleSpellings="dawlatabad (faryab)" if PossibleSpellings=="dawlat abad" & Province_Name=="faryab"
replace PossibleSpellings="fayzabad (badakhshan)" if PossibleSpellings=="fayzabad" & Province_Name=="Badakhshan"
replace PossibleSpellings="fayzabad (jawzjan)" if PossibleSpellings=="fayzabad" & Province_Name=="jawzjan"
replace PossibleSpellings="fayzabad (badakhshan)" if PossibleSpellings=="fayzabad" & Province_Name=="badakhshan"
replace PossibleSpellings="fayzabad (jawzjan)" if PossibleSpellings=="fayzabad" & Province_Name=="jawzjan"
replace PossibleSpellings="jani khel (paktya)" if PossibleSpellings=="jani khel" & Province_Name=="paktya"
replace PossibleSpellings="jani khel (paktika)" if PossibleSpellings=="jani khel" & Province_Name=="paktika"	
replace PossibleSpellings="kohistan (badakhshan)" if PossibleSpellings=="kohistan" & Province_Name=="badakhshan"
replace PossibleSpellings="kohistan (faryab)" if PossibleSpellings=="kohistan" & Province_Name=="faryab"
replace PossibleSpellings="muqur (ghazni)" if PossibleSpellings=="muqur" & Province_Name=="ghazni"
replace PossibleSpellings="muqur (badghis)" if PossibleSpellings=="muqur" & Province_Name=="badghis"		
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qarabagh" & Province_Name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qara bagh" & Province_Name=="ghazni"
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qara bagh" & Province_Name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qarrabagh" & Province_Name=="ghazni"
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qarrabagh" & Province_Name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qarabagh" & Province_Name=="ghazni"
replace PossibleSpellings="reg (hilmand)" if PossibleSpellings=="reg" & Province_Name=="hilmand"
replace PossibleSpellings="reg (kandahar)" if PossibleSpellings=="reg" & Province_Name=="kandahar"

merge m:1 PossibleSpellings using "processed\temp.dta"
*all matched
drop if _merge == 2
drop _merge 
drop Province_Name PossibleSpellings

duplicates list district 
duplicates tag district , gen(dup)
*br if dup == 1
*there are duplicates in terms of district: this is because of different erad_type
*therefore collapse sum:

*but first save labels:
foreach v of var * {
	local l`v' : variable label `v'
	if `"`l`v''"' == "" {
		local l`v' "`v'"
	}
}

collapse (sum) erad_v2010 erad_v2011 erad_v2012 erad_v2013 erad_v2014 erad_v2015 erad_claimed2010 erad_claim_survey2010 erad_survey2010 erad_tot2010 opiumcult_survey2010 erad_fields2010 erad_villages2010 opiumcult_post2010 pct_opium2010 pct_erad2010 erad_claimed2011 erad_claim_survey2011 erad_survey2011 erad_tot2011 opiumcult_survey2011 erad_fields2011 erad_villages2011 opiumcult_post2011 pct_opium2011 pct_erad2011 erad_claimed2012 erad_claim_survey2012 erad_survey2012 erad_tot2012 opiumcult_survey2012 erad_fields2012 erad_villages2012 opiumcult_post2012 pct_opium2012 pct_erad2012 erad_claimed2013 erad_claim_survey2013 erad_survey2013 erad_tot2013 opiumcult_survey2013 erad_fields2013 erad_villages2013 opiumcult_post2013 pct_opium2013 pct_erad2013 erad_claimed2014 erad_claim_survey2014 erad_survey2014 erad_tot2014 opiumcult_survey2014 erad_fields2014 erad_villages2014 opiumcult_post2014 pct_opium2014 pct_erad2014 erad_claimed2015 erad_claim_survey2015 erad_survey2015 erad_tot2015 opiumcult_survey2015 erad_fields2015 erad_villages2015 opiumcult_post2015 pct_opium2015 pct_erad2015 (last) province districtcode dup , by(district)

*missings are now 0!
				
*use saved labels
foreach v of var * {
	label var `v' "`l`v''"
}


reshape long erad_v erad_claimed erad_claim_survey erad_survey erad_tot opiumcult_survey erad_fields erad_villages opiumcult_post pct_opium pct_erad, i(district) j(year)

drop dup
order district province districtcode
sort district year

save "processed\erad20102015_processed.dta", replace

*now append 2006-2009 data

append using "processed\erad20062009_processed.dta"

sort district year

drop erad_claimed erad_claim_survey erad_survey opiumcult_survey // these variables exist only in 2006 and are not important - we use erad_tot for 2006
drop erad_fields erad_villages opiumcult_post pct_opium pct_erad  // we use erad_v instead
* we only have erad_tot and erad_v now

label var erad_tot "eradication (ha) estimate based on ratio verified/claims (only 2006) sourceUNODC"
label var erad_v "eradication (ha) verified (2007-2015), source UNODC"

replace erad_tot = .  if year != 2006
replace erad_v = . if year <= 2006

replace erad_v = . if year == 2008 // There is an error in the source data. The UNODC reports have the exact same data for 2007 and 2008.

gen eradication = erad_v
replace eradication = erad_tot if erad_v == .
label var eradication "eradication (ha), verified 2007-2015, est 2006, missing 2008, source UNODC"

save "processed\erad20062015_processed.dta", replace

	
******************************************************
***Opium Cultivation Data
***source: UNODC 2015 opium survey
*https://www.unodc.org/documents/crop-monitoring/Afghanistan/_Afghan_opium_survey_2015_web.pdf
******************************************************

import delimited "OpiumSurvey2015.csv", varnames(1) clear 
ren v* _#, renumber(2001)
rename disctrict district

rename district PossibleSpellings
replace PossibleSpellings=lower(PossibleSpellings)

replace PossibleSpellings =subinstr( PossibleSpellings ," *","",.)
replace PossibleSpellings =subinstr( PossibleSpellings ,"*","",.)
replace PossibleSpellings="khash rod" if PossibleSpellings=="delaram"   /*Delaram was initially a city in Khash Rod District in Nimruz Province, but was administratively transferred to Farah Province in 2007 for a short ///
time as a separate district. It was transferred back to Nimruz Province and remained a district.*/
rename province Province_Name
replace Province_Name=lower(Province_Name)

replace PossibleSpellings="arghandab (kandahar)" if PossibleSpellings=="arghandab" & Province_Name=="kandahar"
replace PossibleSpellings="arghandab (zabul)" if PossibleSpellings=="arghandab" & Province_Name=="zabul"
replace PossibleSpellings="baharak (badakhshan)" if PossibleSpellings=="baharak" & Province_Name=="badakhshan"
replace PossibleSpellings="baharak (takhar)" if PossibleSpellings=="baharak" & Province_Name=="takhar"
replace PossibleSpellings="dawlatabad (balkh)" if PossibleSpellings=="dowlat abad" & Province_Name=="balkh"
replace PossibleSpellings="dawlatabad (faryab)" if PossibleSpellings=="dowlat abad" & Province_Name=="faryab"
replace PossibleSpellings="fayzabad (badakhshan)" if PossibleSpellings=="faizabad(provincialcenter)" & Province_Name=="badakhshan"
replace PossibleSpellings="fayzabad (jawzjan)" if PossibleSpellings=="faizabad" & Province_Name=="jawzjan"
replace PossibleSpellings="jani khel (paktya)" if PossibleSpellings=="jani khel" & Province_Name=="paktya"
replace PossibleSpellings="jani khel (paktika)" if PossibleSpellings=="jani khel" & Province_Name=="paktika"	
replace PossibleSpellings="kohistan (badakhshan)" if PossibleSpellings=="kohistan" & Province_Name=="badakhshan"
replace PossibleSpellings="kohistan (faryab)" if PossibleSpellings=="kohistan" & Province_Name=="faryab"
replace PossibleSpellings="muqur (ghazni)" if PossibleSpellings=="muqur" & Province_Name=="ghazni"
replace PossibleSpellings="muqur (badghis)" if PossibleSpellings=="muqur" & Province_Name=="badghis"		
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qara bagh" & Province_Name=="ghazni"
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qara bagh" & Province_Name=="kabul"
replace PossibleSpellings="reg (hilmand)" if PossibleSpellings=="reg-i-khan nishin" & Province_Name=="hilmand"
replace PossibleSpellings="reg (kandahar)" if PossibleSpellings=="reg" & Province_Name=="kandahar"
replace PossibleSpellings="ishkamish (takhar)" if PossibleSpellings=="eshkamish" & Province_Name=="takhar"
replace PossibleSpellings="ishkashim (badakhshan)" if PossibleSpellings=="eshkashim" & Province_Name=="badakhshan"

replace _2014="148" if PossibleSpellings=="gizab" & Province_Name=="daykundi"
replace _2015="107" if PossibleSpellings=="gizab" & Province_Name=="daykundi"
drop if PossibleSpellings=="gizab" & Province_Name=="uruzgan"
drop if PossibleSpellings=="jaghatu" & Province_Name=="ghazni"
drop if PossibleSpellings=="kahmard" & Province_Name=="bamyan"
replace Province_Name="bamyan" if Province_Name=="baghlan" & PossibleSpellings=="kahmard" 
drop if PossibleSpellings=="khash rod" & Province_Name=="farah"
replace _2002="490" if PossibleSpellings=="nesh" & Province_Name=="kandahar"
replace _2003="59" if PossibleSpellings=="nesh" & Province_Name=="kandahar"
replace _2004="426" if PossibleSpellings=="nesh" & Province_Name=="kandahar"
replace _2005="352" if PossibleSpellings=="nesh" & Province_Name=="kandahar"
replace _2006="614" if PossibleSpellings=="nesh" & Province_Name=="kandahar"
drop if PossibleSpellings=="nesh" & Province_Name=="uruzgan"
drop if PossibleSpellings=="tagab" & Province_Name=="badakhshan"
	
reshape long _, i(PossibleSpellings) j(year) 
rename _ cultivation

gen cultivation1=cultivation
replace cultivation1 =subinstr( cultivation1 ,",",".",.)
destring cultivation1, replace force

drop cultivation
rename cultivation1 cultivation

merge m:1 PossibleSpellings using "processed\temp.dta"	
drop if _merge==2 | _merge==1 /*kohistan in Kapisa 2004 only value and not clear to which split it belongs, all the others have no values anyways - so no issue,
also for district panjshir no values given and there are already 8 districts in the panshir province as it should be the case, for ali khail also no values	*/
drop _merge Province_Name PossibleSpellings
order district districtcode province year cultivation

sort district year
bysort districtcode year: gen n=_n
drop if n==2 & cultivation==.
bysort districtcode year: egen mcultivation=mean(cultivation)
drop if n==2
drop cultivation n
rename mcultivation cultivation 

label var cultivation "Opium cultivation in hectares (UNODC 2015)"

save "processed\opiumcultivation.dta", replace



******************************************************
***Opium Production Data
***source: UNODC 2015 opium survey
******************************************************	

/*Let's take for instance the 2015 Afghanistan Opium Survey (cultivation and production):  
in Annex I (p.63) there are the estimates of poppy cultivation area at district level.  
At page 30 of the same report you can find a table with the estimated yield factor by region. 
You just have to multiply the estimated district area by the yield factor of the correspondent region. 
E.G.: The district Nad Ali in Hilmand province had a poppy cultivation area of 17,022 ha; 
Hilman is part of the Southern region (see table 16, p. 32 for reference) which in 2015 had a yield factor of 16.1 kg/ha. 
The production of Nad Ali was therefore  17,022 X 16.1 = 274 mt.*/	

***For 2002 extra data
import excel "2002_OpiumYield.xlsx", sheet("Tabelle1") firstrow clear
replace IrrigatedSampleYieldAverage="." if IrrigatedSampleYieldAverage=="NA"
replace RainfedSampleYieldAveragekg="." if RainfedSampleYieldAveragekg=="NA"
destring IrrigatedSampleYieldAverage, replace
destring RainfedSampleYieldAveragekg, replace
egen meanOpiumYieldkgha=rowmean(IrrigatedSampleYieldAverage RainfedSampleYieldAveragekg)
drop Source IrrigatedSampleYieldAverage RainfedSampleYieldAveragekg
rename Province province
replace province=lower(province)
replace province="hilmand" if province=="helmand"
merge m:1 province using "processed\province.dta"
drop if _merge==2
drop _merge
rename Year year

gen Region="."
replace Region="North-eastern" if province=="badakhshan"
replace Region="South-western" if province=="hilmand"
replace Region="Eastern" if province=="nangarhar"

collapse (mean) meanOpiumYieldkgha (last) Region, by(province year)
save "processed\yield2002.dta", replace 


*process yield data
import excel "2002_2003_OpiumYield.xlsx", sheet("Tabelle1") firstrow clear
replace OpiumYieldkgha="." if OpiumYieldkgha=="NA"
drop if missing(OpiumYieldkgha)
drop OpiumYieldirrigatedkgha OpiumYieldrainfedkgha
destring OpiumYieldkgha, replace
rename Year year
rename Province province
replace province=lower(province)
replace province="badghis" if province=="baghis"
replace province="daykundi" if province=="day kundi"
replace province="panjsher" if province=="panjshir"
replace province="farah" if province=="farh"
merge m:1 province using "processed\province.dta"
drop _merge

replace OpiumYieldkgha=0 if Region=="Central" & year==2007 & OpiumYieldkgha==.
replace OpiumYieldkgha=0 if Region=="Northern" & year==2009 & OpiumYieldkgha==.
sum OpiumYieldkgha if Region=="Northern" & year==2009
sum OpiumYieldkgha if Region=="Eastern" & year==2009
sum OpiumYieldkgha if Region=="North - eastern" & year==2009
replace OpiumYieldkgha=(36.2+0+34.3)/3 if Region=="Central" & year==2009	/*following UNODC as they did it in 2013*/

sum OpiumYieldkgha if Region=="Northern" & year==2002
sum OpiumYieldkgha if Region=="Eastern" & year==2002
sum OpiumYieldkgha if Region=="North-eastern" & year==2002
replace OpiumYieldkgha=(36.2+0+34.3)/3 if Region=="Central" & year==2002	/*following UNODC as they did it in 2013*/

drop Region
sort year province
unique province

save "processed\yield.dta", replace 



/*******************************************************************************
*                                Population Data                               *
*******************************************************************************/

***Nightlight Data
foreach code in pwpop2000 pwpop2005 pwpop2010 pwpop2015 {
import excel "`code'.xls", firstrow clear
	gen year="`code'"
	replace year =subinstr( year ,"pwpop","",.)
	destring year, replace
	rename MEAN MEAN`code'
	drop OID
	save "processed/`code'.dta", replace

}


use "processed\districtnames.dta", clear
expand 16
bysort OBJECTID: gen year=2000+_n-1

foreach code in pwpop2000 pwpop2005 pwpop2010 pwpop2015 {
	merge 1:1 OBJECTID year using "processed/`code'.dta"
	drop _merge
}

gen population=.

foreach code in 2000 2005 2010 2015 {
	replace population=MEANpwpop`code' if year==`code'
}

drop MEAN* AREA COUNT OBJECTID

label var population "District-level population"

save "processed\population.dta", replace



/*******************************************************************************
*                                 SIGACTS Data                                 *
*******************************************************************************/

*********************************************************************** ISAF *******************************************************************************
*received from Austin L. Wright
/*Please cite: "Shaver, A. and A. Wright (2016). Are modern insurgencies predictable? New evidence from the afghanistan and iraq wars."

DF = direct fire attacks
IDF = indirect fire attacks
IED_Explosion = IED attack (with detonation)

DISTID corresponds to the 398 boundaries file from ESOC. 
*/

import delimited "esoc.csv", clear

replace distid=2010 if prov_34_na=="Hirat" & dist_34_na=="Zanda  Jan"
replace provid=20 if prov_34_na=="Hirat" & dist_34_na=="Zanda  Jan"

replace distid=1125 if prov_34_na=="Badakhshan" & dist_34_na=="Warduj"
replace provid=11 if prov_34_na=="Badakhshan" & dist_34_na=="Warduj"

drop if prov_34_na==""

rename objectid OBJECTID
destring OBJECTID, replace
merge m:1 OBJECTID using "processed\districtnames.dta"
drop _merge
br if dist_34_na!= district
replace dist_34_na=lower(dist_34_na)
* only difference in names: district parun (nuristan) in province nuristan

*Codes mostly the same, but not always, so stick to my codes
rename distid distid_esoc

save "processed\esoc.dta", replace


*load SIGACTS year-district-level data

import delimited "combat_types_randomized.csv", clear

rename distid distid_esoc

merge m:1 distid_esoc using "processed\esoc.dta"
drop _merge

drop prov_34_na dist_34_na

label var df "direct fire attacks, SIGACTs"
label var idf "indirect fire attacks, SIGACTs"
label var ied_explosion "IED attack (with detonation), SIGACTs"


save "processed\sigacts.dta", replace

	
*load actor-specific SIGACTS data
import delimited "sigacts_casualties_randomized.csv", clear

rename distid distid_esoc

merge m:1 distid_esoc using "processed\esoc.dta"
drop if _merge==2
drop _merge /*no sigacts reported for these cases*/

drop prov_34_na dist_34_na

label var total_b3event "Sum of big 3 events, SIGACTs"
label var casualty_b3event "Sum of big 3 events tied to casualties, SIGACTs"
label var coal_b3involve "Sum of big 3 events involving coalition forces, SIGACTs"
label var afghan_b3involve  "Sum of big 3 events involving Afghan forces (ANA, ANP, ALP, ANCOP), SIGACTs"

/*
- there are 8 (of the 398) districts that do not have any of these types of events from 2002-2014. 
- Coalition forces are events labeled as "coalition" this could involve a range of actors but would be primarily US forces.
- Afghan includes primarily ANA, ANP, ALP, ANCOP
- for the data shared with the authors, these are total events (big three) involving coalition forces (or afghan forces). This does not contain information about whether those events are tied to casualties. 
*/

save "processed\sigacts_actor.dta", replace



/*******************************************************************************
*                                 Trafficking                                  *
*******************************************************************************/


import excel "Districts_UNODC_Data.xlsx", sheet("Sheet1") firstrow clear

merge m:1 OBJECTID using "processed\districtnames.dta"

drop OBJECTID PROV_34_NA DIST_34_NA DISTID PROVID Shape_Length Shape_Area _merge

save "processed\trafficking.dta", replace



/*******************************************************************************
*                                UCDP GED Data                          v      *
*******************************************************************************/

import excel "ged40.xlsx", clear firstrow

keep if country == "Afghanistan"
sort year 
tab year // 1989 - 2014
rename adm_2 PossibleSpellings
rename adm_1 Province_Name
replace Province_Name=lower(Province_Name)
replace PossibleSpellings=lower(PossibleSpellings)
replace PossibleSpellings =subinstr( PossibleSpellings ," district","",.)
replace Province_Name =subinstr( Province_Name ," province","",.)

replace PossibleSpellings="arghandab (kandahar)" if PossibleSpellings=="arghandab" & Province_Name=="kandahar"
replace PossibleSpellings="arghandab (zabul)" if PossibleSpellings=="arghandab" & Province_Name=="zabul"
replace PossibleSpellings="baharak (badakhshan)" if PossibleSpellings=="baharak" & Province_Name=="badakhshan"
replace PossibleSpellings="baharak (takhar)" if PossibleSpellings=="baharak" & Province_Name=="takhar"
replace PossibleSpellings="dawlatabad (balkh)" if PossibleSpellings=="dawlatabad" & Province_Name=="balkh"
replace PossibleSpellings="dawlatabad (faryab)" if PossibleSpellings=="dawlatabad" & Province_Name=="faryab"
replace PossibleSpellings="dawlatabad (balkh)" if PossibleSpellings=="dawlat abad" & Province_Name=="balkh"
replace PossibleSpellings="dawlatabad (faryab)" if PossibleSpellings=="dawlat abad" & Province_Name=="faryab"
replace PossibleSpellings="fayzabad (badakhshan)" if PossibleSpellings=="fayzabad" & Province_Name=="Badakhshan"
replace PossibleSpellings="fayzabad (jawzjan)" if PossibleSpellings=="fayzabad" & Province_Name=="jawzjan"
replace PossibleSpellings="fayzabad (badakhshan)" if PossibleSpellings=="fayzabad" & Province_Name=="badakhshan"
replace PossibleSpellings="fayzabad (jawzjan)" if PossibleSpellings=="fayzabad" & Province_Name=="jawzjan"
replace PossibleSpellings="jani khel (paktya)" if PossibleSpellings=="jani khel" & Province_Name=="paktya"
replace PossibleSpellings="jani khel (paktika)" if PossibleSpellings=="jani khel" & Province_Name=="paktika"	
replace PossibleSpellings="kohistan (badakhshan)" if PossibleSpellings=="kohistan" & Province_Name=="badakhshan"
replace PossibleSpellings="kohistan (faryab)" if PossibleSpellings=="kohistan" & Province_Name=="faryab"
replace PossibleSpellings="muqur (ghazni)" if PossibleSpellings=="muqur" & Province_Name=="ghazni"
replace PossibleSpellings="muqur (badghis)" if PossibleSpellings=="muqur" & Province_Name=="badghis"		
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qarabagh" & Province_Name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qara bagh" & Province_Name=="ghazni"
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qara bagh" & Province_Name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qarrabagh" & Province_Name=="ghazni"
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qarrabagh" & Province_Name=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qarabagh" & Province_Name=="ghazni"
replace PossibleSpellings="reg (hilmand)" if PossibleSpellings=="reg" & Province_Name=="hilmand"
replace PossibleSpellings="reg (kandahar)" if PossibleSpellings=="reg" & Province_Name=="kandahar"
replace PossibleSpellings="ishkamish (takhar)" if PossibleSpellings=="ishkamish" & Province_Name=="takhar"
replace PossibleSpellings="ishkashim (badakhshan)" if PossibleSpellings=="ishkashim" & Province_Name=="badakhshan"
replace PossibleSpellings="zinda jan" if PossibleSpellings=="zanda  jan"


merge m:1 PossibleSpellings using "processed\temp.dta"
*take out non-matched to assign district via GIS (longitutde&latitude) 
preserve
	keep if _merge==1
	save "processed\UCDP_GED_nonmatched.dta", replace
restore

drop if _merge==2
drop _merge	

destring year, replace
save "processed\UCDP_GED1.dta", replace
clear

**get missing districtnames from GIS attributes zuordnen 
*loaded in Qgis

import delimited "getdistrictGED.csv"
drop x y _merge province district
drop if missing(id)
replace prov_34_na="Kunar" if id=="134330"
replace dist_34_na="Wata Pur" if id=="134330"
drop if missing(relid) /*drops this one observation where information from line before is saved in id*/

rename dist_34_na PossibleSpellings
replace PossibleSpellings=lower(PossibleSpellings)
replace prov_34_na=lower(prov_34_na)

replace PossibleSpellings="arghandab (kandahar)" if PossibleSpellings=="arghandab" & prov_34_na=="kandahar"
replace PossibleSpellings="arghandab (zabul)" if PossibleSpellings=="arghandab" & prov_34_na=="zabul"
replace PossibleSpellings="baharak (badakhshan)" if PossibleSpellings=="baharak" & prov_34_na=="badakhshan"
replace PossibleSpellings="baharak (takhar)" if PossibleSpellings=="baharak" & prov_34_na=="takhar"
replace PossibleSpellings="dawlatabad (balkh)" if PossibleSpellings=="dawlatabad" & prov_34_na=="balkh"
replace PossibleSpellings="dawlatabad (faryab)" if PossibleSpellings=="dawlatabad" & prov_34_na=="faryab"
replace PossibleSpellings="dawlatabad (balkh)" if PossibleSpellings=="dawlat abad" & prov_34_na=="balkh"
replace PossibleSpellings="dawlatabad (faryab)" if PossibleSpellings=="dawlat abad" & prov_34_na=="faryab"
replace PossibleSpellings="fayzabad (badakhshan)" if PossibleSpellings=="fayzabad" & prov_34_na=="Badakhshan"
replace PossibleSpellings="fayzabad (jawzjan)" if PossibleSpellings=="fayzabad" & prov_34_na=="jawzjan"
replace PossibleSpellings="fayzabad (badakhshan)" if PossibleSpellings=="fayzabad" & prov_34_na=="badakhshan"
replace PossibleSpellings="fayzabad (jawzjan)" if PossibleSpellings=="fayzabad" & prov_34_na=="jawzjan"
replace PossibleSpellings="jani khel (paktya)" if PossibleSpellings=="jani khail" & prov_34_na=="paktya"
replace PossibleSpellings="jani khel (paktika)" if PossibleSpellings=="jani khel" & prov_34_na=="paktika"	
replace PossibleSpellings="kohistan (badakhshan)" if PossibleSpellings=="kohistan" & prov_34_na=="badakhshan"
replace PossibleSpellings="kohistan (faryab)" if PossibleSpellings=="kohistan" & prov_34_na=="faryab"
replace PossibleSpellings="muqur (ghazni)" if PossibleSpellings=="muqur" & prov_34_na=="ghazni"
replace PossibleSpellings="muqur (badghis)" if PossibleSpellings=="muqur" & prov_34_na=="badghis"		
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qarabagh" & prov_34_na=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qara bagh" & prov_34_na=="ghazni"
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qara bagh" & prov_34_na=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qarrabagh" & prov_34_na=="ghazni"
replace PossibleSpellings="qarabagh (kabul)" if PossibleSpellings=="qarrabagh" & prov_34_na=="kabul"
replace PossibleSpellings="qarabagh (ghazni)" if PossibleSpellings=="qarabagh" & prov_34_na=="ghazni"
replace PossibleSpellings="reg (hilmand)" if PossibleSpellings=="reg" & prov_34_na=="hilmand"
replace PossibleSpellings="reg (kandahar)" if PossibleSpellings=="registan" & prov_34_na=="kandahar"
replace PossibleSpellings="ishkamish (takhar)" if PossibleSpellings=="ishkamish" & prov_34_na=="takhar"
replace PossibleSpellings="ishkashim (badakhshan)" if PossibleSpellings=="ishkashim" & prov_34_na=="badakhshan"
replace PossibleSpellings="zinda jan" if PossibleSpellings=="zanda  jan"

merge m:1 PossibleSpellings  using "processed\temp.dta"
drop if _merge==1 | _merge==2
drop _merge
gen counter = 1

preserve


	keep if year>2001 & year<=2014
	collapse (mean) best_est (sum) counter, by(year)

	rename best_est avgbrd_event
	rename counter total_events

	save "processed\ucdp_averages.dta", replace
restore

keep id relid year province district districtcode
rename province province1
rename district district1
rename districtcode districtcode1

rename id oldid
gen id=oldid
drop oldid

save "processed\getdistrictGED.dta", replace

*now use UCDP_GED1 and add missing districtnames
use "processed\UCDP_GED1.dta", clear
merge 1:1 id relid year  using "processed\getdistrictGED.dta"
drop _merge

replace district=district1 if district==""
replace districtcode=districtcode1 if districtcode==.
replace province=province1 if province==""
drop district1 districtcode1 province1 PossibleSpellings Province_Name  where_coordinates where_description

drop if missing(districtcode) /*for 7 observations no district found - either also no information on latitude/longitude 
or out of Afghanistan map - probably due to rounding - only two cases*/

foreach var in id active_year conflict_new_id dyad_new_id side_a_new_id side_b_new_id number_of_sources where_prec priogrid_gid ///
event_clarity date_prec type_of_violence deaths_a deaths_b deaths_civilians deaths_unknown best_est high_est low_est {
	destring `var', replace
}

***who is fighting: some summary statistics
tab type_of_violence if year>2001&year<2015
tab dyad_name if type_of_violence==1 &  year>2001&year<2015
tab dyad_name if type_of_violence==2 &  year>2001&year<2015
tab dyad_name if type_of_violence==3 &  year>2001&year<2015


/*drop deaths_a deaths_b at the end we do not use information on the two actors, we are interested in the aggregate*/

foreach var in deaths_civilians deaths_unknown best_est high_est low_est {
	sort districtcode year
	bysort districtcode year: egen agg`var'=total(`var'), missing
	bysort districtcode year: egen agg`var'_statebased=total(`var') if type_of_violence==1 , missing
	bysort districtcode year: egen agg`var'_nonstate=total(`var') if type_of_violence==2 , missing
	bysort districtcode year: egen agg`var'_onsesided=total(`var') if type_of_violence==3 , missing
}

bysort districtcode year: egen aggbest_est_tc_onesided=total(best_est) if  type_of_violence==3  & dyad_name=="Taleban - Civilians", missing
bysort districtcode year: egen aggcivilians_est_tc_onesided=total(deaths_civilians) if  type_of_violence==3  & dyad_name=="Taleban - Civilians", missing
bysort districtcode year: egen aggbest_est_tg=total(best_est) if  type_of_violence==1 & dyad_name=="Government of Afghanistan - Taleban"
bysort districtcode year: egen aggbest_est_ng=total(best_est) if  type_of_violence==1 & dyad_name=="Government of Afghanistan - UIFSA"
bysort districtcode year: egen aggtbest_est_tg=total(deaths_b) if type_of_violence==1 & dyad_name=="Government of Afghanistan - Taleban"
bysort districtcode year: egen agggbest_est_tg=total(deaths_a) if type_of_violence==1 & dyad_name=="Government of Afghanistan - Taleban"
bysort districtcode year: egen aggbest_est_th=total(best_est) if dyad_name=="Hizb-i Islami-yi Afghanistan - Taleban"
bysort districtcode year: egen aggbest_est_other=total(best_est) if dyad_name!="Government of Afghanistan - Taleban" & dyad_name!="Taleban - Civilians"
bysort districtcode year: egen aggbest_est_USA_AQ=total(best_est) if dyad_name=="Government of United States of America - al-Qaida"

foreach var in deaths_civilians deaths_unknown best_est high_est low_est  {
	sort districtcode year
	bysort districtcode year: egen Magg`var'_statebased=max(agg`var'_statebased)
	bysort districtcode year: egen Magg`var'_nonstate=max(agg`var'_nonstate)
	bysort districtcode year: egen Magg`var'_onsesided=max(agg`var'_onsesided) 
}

foreach var in aggbest_est_tc_onesided aggcivilians_est_tc_onesided aggbest_est_tg aggtbest_est_tg agggbest_est_tg aggbest_est_ng aggbest_est_th aggbest_est_other aggbest_est_USA_AQ {
bysort districtcode year: egen M`var'=max(`var')
}

keep Magg* aggdeaths_civilians aggdeaths_unknown aggbest_est agghigh_est agglow_est province districtcode district year
renpfix agg
renpfix Magg
sort districtcode year
bysort districtcode year: gen n=_n
keep if n==1

xtset, clear
xtset districtcode year
drop n

label var best_est_tc_onesided "BRD, one-sided violence, Taleban - Civilians"
label var civilians_est_tc_onesided "Civilian deaths, one-sided violence, Taleban - Civilians"
label var best_est_tg "BRD, statebased violence, Government-Taleban"
label var best_est_ng "BRD, statebased violence, Government-UIFSA (The Northern Alliance)"
label var tbest_est_tg "BRD of Taleban, statebased violence, Government-Taleban"
label var gbest_est_tg "BRD of Government forces, statebased violence, Government-Taleban"
label var best_est_other "BRD of of fights between other groups, i.e. not Taliban or Afghan Government"
label var best_est_th "BRD of fights between Taliban and Hizb-i Islami-yi Afghanistan"
label var best_est_USA_AQ "BRD of fights between overnment of United States of America - al-Qaida"

unique districtcode
tab districtcode year

save "processed\UCDP_GED.dta", replace



/*******************************************************************************
*                                   VHI Data                                   *
*******************************************************************************/

* Source: Kienberger

foreach code in 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 {
	import excel "VHI_`code'.xls", firstrow clear
	gen year="`code'"
	destring year, replace
	rename MEAN vhi_`code'
	drop OID
	save "processed\VHI_`code'.dta", replace
}


use "processed\districtnames.dta", clear
expand 14
bysort OBJECTID: gen year=2001+_n-1

foreach code in 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012  2013 2014 {
	merge 1:1 OBJECTID year using "processed/VHI_`code'.dta"
	drop _merge
}

gen vhi=.

foreach code in 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012  2013 2014 {
	replace vhi=vhi_`code' if year==`code'
}

drop COUNT AREA vhi_* OBJECTID  province
label var vhi "VHI per district"	

save "processed\vhi.dta", replace

unique districtcode

***VHI Data March- September
* Source: Kienberger


foreach code in 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012  2013 2014 {
	import excel "VHI_`code'_ms.xls", firstrow clear
	gen year="`code'"
	destring year, replace
	rename MEAN vhi_`code'
	drop OID
	save "processed\VHI_`code'_ms.dta", replace
}


use "processed\districtnames.dta", clear
expand 14

bysort OBJECTID: gen year=2001+_n-1



foreach code in 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012  2013 2014 {
	merge 1:1 OBJECTID year using "processed/VHI_`code'_ms.dta"
	drop _merge
}


gen vhi_ms=.

foreach code in 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012  2013 2014 {
	replace vhi_ms=vhi_`code' if year==`code'
}

drop COUNT AREA vhi_2* OBJECTID  province
label var vhi_ms "VHI per district, 03-09"	

save "processed\vhi_ms.dta", replace



/*******************************************************************************
*     Creating a balanced dataset to have a starting dataset for merging       *
*******************************************************************************/

use "processed\districtnames.dta", clear	
drop PossibleSpellings

unique district_alternative province_alternative
unique district
unique districtcode

duplicates drop district, force /*398 remain, also for districtcodes*/

expand 27 /*from 1989 - 2015*/
sort district
bysort district: gen year=1989+_n-1
xtset, clear
xtset districtcode year

save "processed\balanceddataset1989_2015.dta", replace
	


/*******************************************************************************
*                               Merge all data                                 *
*******************************************************************************/

use "processed\balanceddataset1989_2015.dta", clear

xtset

***********************
***Conflict Datasets
***********************

***UCDP GED
merge 1:1 districtcode year using "processed\UCDP_GED.dta"
drop _merge

unique districtcode
xtset

***SIGACTS
merge 1:1 districtcode year using "processed\sigacts.dta"
drop _merge		

unique districtcode
xtset

merge 1:1 districtcode year using "processed\sigacts_actor.dta"
drop _merge

unique districtcode
xtset


*********************
***Opium Datasets
*********************

merge 1:1 districtcode year using  "processed\opiumcultivation.dta"
drop _merge
replace cultivation=0 if cultivation==. & year>=2001

unique districtcode
xtset

merge m:1 province year using "processed\yield.dta"
drop _merge

unique districtcode
xtset

gen production=cultivation*OpiumYieldkgha/1000 /*to get it in tons rather than kg*/
label var production "Opium production in tons"
replace production=0 if cultivation==0 & OpiumYieldkgha==.

merge 1:1 districtcode year using "processed\erad20062015_processed.dta"
drop _merge 

unique districtcode
xtset

merge m:1 districtcode using "processed\NunnPuga_ruggedness.dta"
drop _merge
xtset

merge m:1 districtcode using "processed\suitability_rainfed_classes.dta"
drop _merge 
xtset

merge m:1 districtcode using "processed\suitability_irrigated_classes.dta"
drop _merge 
xtset

foreach var in wheat potato rice maize barley {
	replace suitability_ii_`var'=(-1)*suitability_ii_`var'
	replace suitability_ir_`var'=(-1)*suitability_ir_`var'
}

foreach var in wheat potato rice maize barley {
	egen suitability_`var'=rowmean(suitability_ii_`var' suitability_ir_`var')
}
foreach var in wheat potato rice maize barley {
	label var suitability_`var' "Crop suitability index (class) for current cultivated land for `var' - average of rain-fed and irrigated "
}

merge m:1 districtcode using "processed\suitability_rainfed_values.dta"
drop _merge 
xtset

merge m:1 districtcode using "processed\suitability_irrigated_values.dta"
drop _merge 
xtset

foreach var in wheat potato maize barley {
	egen suitability_v`var'=rowmean(suitability_vii_`var' suitability_vir_`var')
}
foreach var in wheat potato maize barley {
	label var suitability_v`var' "Crop suitability index (values) for current cultivated land for `var' - average of rain-fed and irrigated "
}

merge m:1 districtcode using "processed\opiumsuitability.dta"
drop _merge 
rename suitiability_max_opium suitability_max_opium
xtset

merge m:1 districtcode using "processed\opiumsuitability_raster.dta"
drop _merge 
rename suitabilty_raster_opium suitability_raster_opium
xtset

merge m:1 districtcode using "processed\opiumsuitability_weighted.dta"
drop _merge 
xtset

merge m:1 districtcode using "processed\opiumsuitability_raster_weighted"	
drop _merge 
xtset

replace suitability_Rweighted_wheat=(-1)*suitability_Rweighted_wheat
label var suitability_Rweighted_wheat "Raster weighted (by population) wheat suitability: average of rain-fed and irrigated"
merge m:1 districtcode using "processed\latitute_distances.dta"
drop _merge
drop if year==.
xtset

merge m:1 districtcode using "processed\routedistance.dta"
drop _merge
xtset

*****************************************
*Nightlight Data & population & rainfall
*****************************************
merge 1:1 year districtcode using "processed\nightlight.dta"
drop _merge 
xtset


merge 1:1 year districtcode using "processed\population.dta"
drop _merge 
xtset

bysort districtcode: ipolate population year, gen(pop)

merge 1:1 year districtcode using "processed\PRIO-GRIDrainfall.dta"
drop _merge 
xtset

merge 1:1 year districtcode using "processed\vhi.dta" 
drop _merge 
xtset

merge 1:1 year districtcode using "processed\vhi_ms.dta" 
drop _merge 	
xtset

tab nightlight if year > 2001 & year < 2015, missing

***************************************************
*Drug Price Data (mean over available countries)
***************************************************


foreach c in crackprice whiteheroinprice Methamphetamineprice cocaineprice amphetamineprice brownheroinprice LSDprice Ecstasyprice {
	merge m:1 year using "processed/`c'new.dta"
	drop _merge
}


*UNODC opium price

merge m:1 year using "processed\opiumpriceAFG_yearly.dta"
drop _merge

merge m:1 province year using "processed\localopiumpricesbyprovince.dta"
drop _merge

*Opioid prescription
merge m:1 year using  "processed\opioidprescription.dta"	
drop _merge	

tab nightlight if year > 2001 & year < 2015, missing

********************************
*Commodity Price Data (FAO)
********************************
***Commodity Prices Afghanistan
foreach code in  wheat barley maize potato rice {
	merge m:1 year using "processed/`code'price.dta"
	drop _merge
}

tab nightlight if year > 2001 & year < 2015, missing

***International Commodity Prices
merge m:1 year using "processed\intcommodityprice.dta"
drop if _merge==2
drop _merge

***Deflators
merge m:1 year using "processed\GDPdeflUS.dta"
drop if _merge==2
drop _merge

merge m:1 year using "processed\GDPdeflEU.dta"
drop if _merge==2
drop _merge

tab nightlight if year > 2001 & year < 2015, missing
	
****************
*Military bases 
****************
merge m:1 districtcode using "processed\camps.dta"
tab nightlight if year > 2001 & year < 2015, missing
gen anycamp=1 if _merge==3
drop _merge

replace anycamp=0 if year <minopened 
replace anycamp=0 if year>maxclosed 
*drop no_camps maxclosed minopened
drop maxclosed minopened
replace anycamp=0 if year>=2003 & year<=2006 & district=="garmser" /*the only district where there was no camp in between*/
label var anycamp "At least one military base/camp/facility in district"

merge m:1 districtcode year using "processed\camps_opened.dta"		
drop _merge

merge m:1 districtcode year using "processed\camps_closed.dta"		
drop _merge

bysort districtcode: gen bases_closed_n=sum(bases_closed)
bysort districtcode: gen bases_opened_n=sum(bases_opened)

gen bases_count=bases_opened_n-bases_closed_n

drop bases_opened bases_closed bases_closed_n bases_opened_n
label var bases_count "Number of bases that are open and not yet closed within a district"

tab nightlight if year > 2001 & year < 2015, missing

***************
*Trafficking
***************
merge m:1 districtcode using "processed\trafficking.dta"
drop _merge

foreach X in Major_Opium_Market Morphine_Lab Sub_Opium_Market Heroin_Processing_Lab Unofficial_Border_Crossing Crystal_Heroin_Lab Main_Roads Secondary_Roads anycamp {
	replace `X'=0 if `X'==.
}

tab nightlight if year > 2001 & year < 2015, missing

mdesc

*****************
*Ethnic Groups
*****************	

merge m:1 districtcode using "processed\ethnicconnections.dta"	
drop  _merge

merge m:1 districtcode using "processed\ethnicgroups_greg_ethno.dta"
drop  _merge

tabulate year

unique province_alternative district_alternative

replace district_alternative = "warduj" if district_alternative == "warduj "
replace district_alternative = "zanda jan" if district_alternative == "zanda  jan"


merge m:1 province_alternative district_alternative using "processed\Ethnicity_connections.dta", keepusing(Border_Ethnicicty_IranPakistan Border_Ethnicity_All Pashtuns Taliban_Territory_1996 Dschunbisch_Territory_1996 Hizb_i_Wahclat_Territory_1996 Dschamiat_Territory_1996)

order Border_Ethnicicty_IranPakistan Border_Ethnicity_All Pashtuns Taliban_Territory_1996 Dschunbisch_Territory_1996 Hizb_i_Wahclat_Territory_1996 Dschamiat_Territory_1996, after(pashtun_ethno)

order districtcode district district_alternative

tab _merge if year > 2001 & year < 2015


order Border_Ethnicicty_IranPakistan Border_Ethnicity_All Pashtuns Taliban_Territory_1996 Dschunbisch_Territory_1996 Hizb_i_Wahclat_Territory_1996 Dschamiat_Territory_1996, after(pashtun_ethno)

mdesc if year > 2001 & year < 2015
tab _merge if year > 2001 & year < 2015

tab Taliban_Territory_1996 if year > 2001 & year < 2015, missing

summarize Taliban_Territory_1996 if year > 2001 & year < 2015

replace Border_Ethnicicty_IranPakistan=0 if district=="warduj" & districtcode==1125
replace Border_Ethnicicty_IranPakistan=1 if district=="zinda  jan" & districtcode==2010
replace Border_Ethnicity_All=1 if district=="zinda  jan" & districtcode==2010 | district=="warduj" & districtcode==1125
replace Pashtuns=0 if district=="warduj" & districtcode==1125
replace Pashtuns=1 if district=="zinda  jan" & districtcode==2010
replace Taliban_Territory_1996=0 if district=="warduj" & districtcode==1125
replace Taliban_Territory_1996=1 if district=="zinda  jan" & districtcode==2010
replace Dschunbisch_Territory_1996=0 if district=="zinda  jan" & districtcode==2010 | district=="warduj" & districtcode==1125
replace Hizb_i_Wahclat_Territory_1996=0 if district=="zinda  jan" & districtcode==2010 | district=="warduj" & districtcode==1125
replace Dschamiat_Territory_1996=1 if district=="warduj" & districtcode==1125
replace Dschamiat_Territory_1996=0 if district=="zinda  jan" & districtcode==2010

drop if _merge==2

tab Taliban_Territory_1996 if year > 2001 & year < 2015, missing

drop _merge

drop if missing(districtcode)

unique districtcode

summarize Taliban_Territory_1996 if year > 2001 & year < 2015


merge m:1 districtcode using "NRVA2003_languages.dta"
drop _merge

label var year "Year"
drop  OBJECTID provid country code 


label var deaths_civilians "The best estimate of dead civilians - for one- and two-sided violence"
label var deaths_unknown "The  best  estimate  of  deaths  of  per sons  of  unknown status."
label var best_est "The best (most likely) estimate of total fatalities"
label var high_est "The highest reliable estimate of total fatalities"
label var low_est "The lowest reliable estimate of total fatalities"
label var pop "Interpolated population from PRIO GRID 2000, 2005, 2010, 2015"
label var prec_gpcc "Rainfall"



replace district_alternative = strrtrim(district_alternative), nopromote
replace district_alternative="zanda jan" if district_alternative=="zanda  jan"	
replace district_alternative="warduj" if district_alternative=="warduj"	


drop if year==.



/*******************************************************************************
*                              Further Data Prep                               *
*******************************************************************************/


tabulate year, gen(time) 
tabulate province, gen(dprov)
tabulate district, gen(ddis)

egen province_id=group(province)
egen id=group(districtcode)
xtset


*add new sigacts for revision
merge 1:1 districtcode year using "processed\sigacts_actor.dta"
drop _merge

unique district_alternative
unique districtcode


*district-specific trend
sort districtcode year
bysort districtcode: gen trend=_n

*Conflict

*SIGACTS
gen anysigact=0 if year>=2002 & year<=2014
replace anysigact=1 if df>0 & df!=. 		
replace anysigact=1 if idf>0 & idf!=. 		
replace anysigact=1 if ied_explosion>0 & ied_explosion!=. 		
label var anysigact "If either DF, IDF or IED_Explosion is larger 0 and not missing"



*replace conflict indicators with zero if missing (for the period of observation)
foreach X in best_est deaths_civilians deaths_unknown best_est high_est low_est deaths_civilians_statebased ///
deaths_civilians_nonstate deaths_civilians_onsesided deaths_unknown_statebased deaths_unknown_nonstate deaths_unknown_onsesided ///
best_est_statebased best_est_nonstate best_est_onsesided high_est_statebased high_est_nonstate high_est_onsesided low_est_statebased ///
low_est_nonstate low_est_onsesided best_est_tc_onesided civilians_est_tc_onesided best_est_tg tbest_est_tg gbest_est_tg best_est_ng best_est_other best_est_th best_est_USA_AQ {
	replace `X'=0 if `X'==. & year>=1989 & year<=2014 
}


foreach X of varlist df idf ied_explosion total_b3event casualty_b3event coal_b3involve afghan_b3involve {
	gen `X'_miss=`X'
	replace `X'=0 if `X'==. & year>=2002 & year<=2014
}

gen anysigact2=0 if year>=2002 & year<=2014
replace anysigact2=1 if total_b3event>0 & total_b3event!=.


gen conflict=0 
replace conflict=1 if best_est>=25
gen war=0
replace war=1 if best_est>=100 

gen smallconflict=0
replace smallconflict=1 if best_est>=5

gen lowconflict=0
replace lowconflict=1 if best_est>=10

gen intensity=0 if best_est==0
replace intensity=1 if best_est>0 & best_est<=25 & best_est!=.
replace intensity=2 if best_est>25  & best_est!=.

gen intensity1=0 if best_est==0
replace intensity1=1 if best_est>0 & best_est<=13 & best_est!=.
replace intensity1=2 if best_est>13  & best_est!=.

*gen onset
foreach X of varlist smallconflict lowconflict conflict war {
	sort districtcode year
	gen `X'onset=0
	bysort districtcode: replace `X'onset=1 if `X'==1 & `X'[_n-1]==0
	bysort districtcode: replace `X'onset=. if `X'==1 & `X'[_n-1]==1
	gen l`X'onset=l.`X'onset

}

*gen ending
foreach X of varlist smallconflict lowconflict conflict war {
	sort districtcode year
	gen `X'end=0
	bysort districtcode: replace `X'end=1 if `X'==0 & `X'[_n-1]==1
	bysort districtcode: replace `X'end=. if `X'==0 & `X'[_n-1]==0
	gen l`X'end=l.`X'end

}

*Prices	 
foreach X in barley maize rice wheat {
	rename `X'_priceint `X'_priceint_cur
}

rename Methamphetaminepricen methprice 
rename Methamphetamineprice_cur methprice_cur
rename LSDpricen lsdprice
rename LSDprice_cur lsdprice_cur
rename Ecstasypricen ecstasyprice
rename Ecstasyprice_cur ecstasyprice_cur



*need to adjust from current to constant, USD, base year 2010 for US
foreach X in barley_priceint maize_priceint rice_priceint wheat_priceint {
	gen `X'=`X'_cur*100/GDPdeflUS
	label var `X' "`X'price in constant 2010 USD, per metric ton"
}

rename brownheroinpricen heroinprice
rename brownheroinprice_cur heroinprice_cur
rename crackpricen crackprice
rename cocainepricen cocaineprice
rename amphetaminepricen amphetamineprice
foreach X in crackprice methprice cocaineprice amphetamineprice lsdprice ecstasyprice heroinprice whiteheroinprice   {
	gen `X'1=`X'_cur*100/GDPdeflEU
	label var `X'1 "`X' in constant 2010 EU, per g" /*rather than using CPI for EU*/
}

*Suitability		
foreach var in suitability_weighted suitability_Rweighted_opium {
	replace `var'=0 if `var'<0 /*robustness test: replace with missing*/
	sum `var' if `var'!=0
	gen min`var'=r(min)
	replace `var'=min`var' if `var'==0
}


replace suitability_Rweighted_wheat=-8 if districtcode==1106 | districtcode==1107 /*these are the two districts for which we could not derive population
weighted measures because population is too low there - for suitability_Rweighted_wheat have to set it so zero in analogy to suitability_Rweighted_opium*/
*8 was the category indicating no suitability, i.e. the minimum - we multiplied it by (-1) to ease interpretation

rename suitability_weighted_opium suitability_w_opium
rename suitability_Rweighted_opium suitability_rw_opium
rename suitability_Rweighted_wheat suitability_rw_wheat


***Normalize suitability between zero and one
foreach var in suitability_barley suitability_maize suitability_rice suitability_wheat suitability_potato suitability_opium suitability_raster_opium ///
suitability_vbarley suitability_vmaize suitability_vwheat suitability_vpotato suitability_max_opium suitability_w_opium suitability_rw_opium suitability_rw_wheat {
	qui sum `var'
	replace `var' = (`var' - `r(min)') / (`r(max)'-`r(min)')
}

*Shocks
*Gen agricultural shock variables by using international prices and suitability measures


*All drug prices are per gram
replace opiumpriceAFG=opiumpriceAFG/1000
label var opiumpriceAFG "Fresh opium farm-gate prices at harvest time in Afghanistan in const.2010 EU/g"

foreach var in  crackprice methprice cocaineprice amphetamineprice heroinprice lsdprice ecstasyprice opiumpriceAFG {
	qui sum `var'
	gen `var'norm=`var'
	replace `var'norm = (`var' - `r(min)') / (`r(max)'-`r(min)')
}

rename wheat_priceint wheatprice
rename maize_priceint maizeprice
rename barley_priceint barleyprice
rename rice_priceint riceprice

foreach var in barleyprice maizeprice riceprice wheatprice {
	qui sum `var'
	gen `var'norm=`var'
	replace `var'norm = (`var' - `r(min)') / (`r(max)'-`r(min)')
}


foreach X in barley maize rice wheat {
	gen ln`X'price=log(`X'price)
}	

foreach X in barley maize rice wheat {
	gen ln`X'pricenorm=log(`X'pricenorm+0.01)
}

gen lnwheat_rw_shock_notnorm=lnwheatprice*suitability_rw_wheat
gen lnwheat_rw_shock=lnwheatpricenorm*suitability_rw_wheat
gen wheat_rw_shock=wheatpricenorm*suitability_rw_wheat
gen lnwheat_shock=lnwheatpricenorm*suitability_wheat


egen complementprice=rowmean(cocaineprice amphetamineprice ecstasyprice)
egen complementpricenorm=rowmean(cocainepricenorm amphetaminepricenorm ecstasypricenorm)


rename complementpricenorm complementnormprice
rename heroinpricenorm heroinnormprice
rename opiumpriceAFGnorm opiumAFGnormprice
rename cocainepricenorm cocainenormprice
rename opiumpriceAFG opiumAFGprice

*Create deviation from mean as price shock variable
foreach X in crackprice methprice cocaineprice  amphetamineprice lsdprice ecstasyprice ///
complementprice heroinprice opiumAFGprice complementnormprice heroinnormprice cocainenormprice opiumAFGnormprice  {
	sum `X'
	gen m`X'=r(mean)
	gen dev`X'=`X'-m`X'
	gen c`X'=`X'-`X'[_n-1]
	gen ln`X'=log(`X'+0.01)
}



foreach X in crack meth cocaine amphetamine lsd ecstasy complement heroin complementnorm heroinnorm opiumAFG opiumAFGnorm cocainenorm {
	gen opiumshock_`X'=`X'price*suitability_raster_opium 
	gen opiumshock_`X'1=dev`X'price*suitability_raster_opium 
	gen opiumshock_rw_`X'=`X'price*suitability_rw_opium 		
	gen opiumshock_rw_`X'1=dev`X'price*suitability_rw_opium 		
	gen opiumshock_ln`X'=ln`X'price*suitability_raster_opium  
	gen opiumshock_rw_ln`X'=ln`X'price*suitability_rw_opium
}

foreach X in crack meth cocaine amphetamine lsd ecstasy complement heroin complementnorm heroinnorm opiumAFGnorm cocainenorm {
	bysort districtcode: gen lopiumshock_`X'=l.opiumshock_`X'
	bysort districtcode: gen l2opiumshock_`X'=l2.opiumshock_`X'
	bysort districtcode: gen lopiumshock_`X'1=l.opiumshock_`X'1
	bysort districtcode: gen l2opiumshock_`X'1=l2.opiumshock_`X'1
	bysort districtcode: gen lopiumshock_rw_`X'=l.opiumshock_rw_`X'
	bysort districtcode: gen l2opiumshock_rw_`X'=l2.opiumshock_rw_`X'
	bysort districtcode: gen lopiumshock_rw_`X'1=l.opiumshock_rw_`X'1
	bysort districtcode: gen l2opiumshock_rw_`X'1=l2.opiumshock_rw_`X'1		
	bysort districtcode: gen lopiumshock_ln`X'=l.opiumshock_ln`X'
	bysort districtcode: gen l2opiumshock_ln`X'=l2.opiumshock_ln`X'
	bysort districtcode: gen lopiumshock_rw_ln`X'=l.opiumshock_rw_ln`X'
	bysort districtcode: gen l2opiumshock_rw_ln`X'=l2.opiumshock_rw_ln`X'
}

gen llnheroinprice=l.lnheroinprice
gen llnheroinnormprice=l.lnheroinnormprice

*Logs and Lags

foreach X in best_est best_est_tc_onesided best_est_tg gbest_est_tg tbest_est_tg ///
best_est_USA_AQ best_est_th best_est_other best_est_statebased best_est_onsesided best_est_nonstate {
	gen ln`X'=log(`X'+1) 
}

foreach X in df idf ied_explosion {
	gen ln`X'=log(`X'+1) if year>=2002 & year<=2014
}

gen lncultivation=log(cultivation+1) if year>=2001 & year<=2014

rename lnbest_est lnbrd
rename best_est_tc_onesided brd_talebciv
rename lnbest_est_tc_onesided lnbrd_talebciv
rename lnbest_est_tg lnbrd_talebgov
rename best_est_tg brd_talebgov
rename lngbest_est_tg lnbrd_gov_talebgov
rename lntbest_est_tg lnbrd_tal_talebgov
label var lnbrd "(log) Battle-related deaths, best estimate"
label var lnbrd_talebciv "(log) Battle-related deaths, best estimate, one-sided violence by Taliban" 
label var lnbrd_talebgov "(log) BRD, statebased violence, Government-Taliban"
label var lnbrd_gov_talebgov "BRD of Government forces, statebased violence, Government-Taliban"
label var lnbrd_tal_talebgov "BRD of Taliban, statebased violence, Government-Taliban"


label var lncultivation "(log) Opium cultivation"

gen lnbrd1=log(best_est+0.001)


gen revenue_wmp=heroinprice*cultivation
gen lnrevenue_wmp=log(revenue_wmp+1)
gen revenue_lp=dryopium_price*cultivation
gen lnrevenue_lp=log(revenue_lp+1)	
gen revenue=opiumAFGprice*1000*1000*production /*price is per gram*//*from g to tons*/
gen lnrevenue=log(revenue+1)


gen lnproduction=log(production+1)

label var revenue_wmp "Revenue using world-market prices for  heroin"
label var lnrevenue_wmp "(log) Revenue using world-market prices for  heroin"
label var revenue_lp "Revenue using local price for Dry Opium Price by Trader in constant 2010 EU/kg"
label var lnrevenue_lp "(log) Revenue using local price for Dry Opium Price by Trader in constant 2010 EU/kg "
label var revenue "Revenue using local price for Fresh opium farm-gate prices at harvest time in Afghanistan in const. 2010 EU/kg"
label var lnrevenue "(log) Revenue using local price for Fresh opium farm-gate prices at harvest time in Afghanistan in const.2010 EU/kg"
	
	
foreach X in dist_d_Kabul dist_d_Hirat dist_d_Mazari_Sharif dist_d_Jalalabad dist_d_Kunduz dist_d_Kandahar {
	replace `X'=`X'/1000 /*to get km*/
}

egen mindist_d_city=rowmin(dist_d_Kabul dist_d_Hirat dist_d_Mazari_Sharif dist_d_Kunduz dist_d_Kandahar)
label var mindist_d_city "Minimum distance to one ot the 5 biggest cities in km: Kabul, Hirat, Sharif, Kunduz, Kandahar"
gen proximity_Kabul=-dist_d_Kabul
	
gen wheat_shock=.
gen opium_shock=.
gen opium_X=.		
xtset	

*Create Covariates

label var pop "Population"
gen lnpop=log(pop+1)
label var lnpop "(Log) Population"

gen log_nightlight=log(nightlight+0.001)
gen In_nightlight_temp=.
replace In_nightlight_temp=log_nightlight if year==2001
bys districtcode: egen In_log_nightlight=mean(In_nightlight_temp)
drop In_nightlight_temp
label var In_log_nightlight "log Initital luminosity (2001)"
label var log_nightlight "log luminosity"

gen In_log_pop_temp=.
replace In_log_pop_temp=lnpop if year==2001
bys districtcode: egen In_log_pop=mean(In_log_pop_temp)
drop In_log_pop_temp
label var In_log_pop "Initial district population, GPW (2001)"

forvalues i=2/5{
	xtile ruggedness_`i'q = ruggedness ,nquantiles(`i')
}

gen lab=0
replace lab=1 if Morphine_Lab==1 | Crystal_Heroin_Lab==1 | Heroin_Processing_Lab==1
gen market=0
replace market=1 if Major_Opium_Market==1 |  Sub_Opium_Market ==1
rename Unofficial_Border_Crossing bordercrossing	

***NEW ethnicity data: compare greg and ethnologue
*pashtun_greg is from same source as pashtuns

gen onlypashtun=0
replace onlypashtun=1 if pashtun_greg==1 & no_ethnic_greg==1
gen onlypashtun_ethno=0
replace onlypashtun_ethno=1 if pashtun_ethno==1 & no_ethnic_ethno==1	

rename Border_Ethnicicty_IranPakistan borderethnic_ip
rename Border_Ethnicity_All borderethnic_all
rename Pashtuns pashtuns
rename Taliban_Territory_1996 taliban1996

gen nopashtuns=0 if pashtuns==1
replace nopashtuns=1 if pashtuns==0
gen notaliban1996=0 if taliban1996==1
replace notaliban1996=1 if taliban1996==0

gen mixedgroups1=.
replace mixedgroups1=pashtun_greg-onlypashtun

gen mixedgroups2=0
replace mixedgroups2=1 if no_ethnic_greg>1

gen mixedgroups3=0
replace mixedgroups3=1 if no_ethnic_greg>1 & no_ethnic_ethno>1 
		
gen onlyonegroup=0
replace onlyonegroup=1 if no_ethnic_greg==1 & no_ethnic_ethno==1

summarize Dschunbisch_Territory_1996 if year > 2001 & year < 2015
summarize Hizb_i_Wahclat_Territory_1996 if year > 2001 & year < 2015
summarize Dschamiat_Territory_1996 if year > 2001 & year < 2015


tab Dschunbisch_Territory_1996 if year > 2001 & year < 2015, missing
tab taliban1996 if year > 2001 & year < 2015, missing
tab Hizb_i_Wahclat_Territory_1996 if year > 2001 & year < 2015, missing
tab Dschamiat_Territory_1996 if year > 2001 & year < 2015, missing
		
gen mixedterritory=0
replace mixedterritory=1 if taliban1996==1 & Dschunbisch_Territory_1996==1
replace mixedterritory=1 if taliban1996==1 & Hizb_i_Wahclat_Territory_1996==1
replace mixedterritory=1 if taliban1996==1 & Dschamiat_Territory_1996==1

summarize mixedterritory if year > 2001 & year < 2015
tab mixedterritory if year > 2001 & year < 2015, missing

gen onlytaliban1996=0 
replace onlytaliban1996=1 if taliban1996==1 & Dschunbisch_Territory_1996!=1 & Hizb_i_Wahclat_Territory_1996!=1 & Dschamiat_Territory_1996!=1


foreach X of varlist total_length total_slength snapping_distance {
	replace `X'=`X'/1000
}

label var total_length "2D length of the route in km"
label var total_slength "3D length of the route in km"
label var snapping_distance "Distance to road network of the district centroid in km"

gen roads=0
replace roads=1 if Main_Roads==1 | Secondary_Roads==1

gen southern=0
replace southern=1 if province=="kandahar" | province=="hilmand" 

gen anycamp2=0
replace anycamp2=1 if bases_count>=1


foreach X of varlist ruggedness* dist_d_Kabul total_length total_slength total_time_2 total_stime_2 pashtun_greg mixedgroups2 mixedterritory taliban1996 no_ethnic_greg Ethnic_Connections {
	gen trend_`X'=trend*`X'
}		

foreach Y of varlist time1-time26 {
	foreach X of varlist ruggedness* dist_d_Kabul total_length total_slength total_time_2 total_stime_2 pashtun_greg mixedgroups2 mixedterritory taliban1996 no_ethnic_greg Ethnic_Connections {
		gen `Y'_`X'=`Y'*`X'
	}
}
		
		
		
/*******************************************************************************
*                                 Market Access                                *
*******************************************************************************/
	
save "processed\GLK_final_analysis_temp.dta", replace

summarize total_stime_2 if year > 2001 & year < 2015

summarize lab if year > 2001 & year < 2015

* Generate market access 

import delimited using "district_centroid.csv", clear
drop objectid objectid_1 orig_fid point_x point_y near_fid near_dist near_x near_y near_angle sourceid sourceoid posalong sideofedge snapx snapy

tab distid

rename objectid_12 OBJECTID_1
rename prov_34_na province_alternative
rename dist_34_na district_alternative
rename distid districtcode

label variable OBJECTID_1 "Object ID generated in ArcGIS" 
label variable distance "Distance of Centroid to road Snaped in ArcGIS"
label variable labs_amount "Total amount of labs in district"
label variable total_market_labs "Sum of all Markets and labs in the district"
label variable total_market_labs_weighted "Sum of all Markets and labs in the district. (2*Labs+1*major_Markets+.5*Sub_Markets)"
label variable market_dummy "Is there a market in the district?"
label variable total_markets_weighted "Sum of all markets in the district. (Major_Markets+0.5*Sub_Markets)"
label variable total_markets "Sum of all markets in the district."
label variable major_opium_market_amount "Number of major markets in the district"
label variable sub_opium_markets_amount "Number of sub markets in the district"
label variable morphine_labs_amount "Number of morphine labs in the district"
label variable heroin_processing_labs_amount "Number of heroin labs in the district"
label variable provid "Province ID"
label variable districtcode "District ID"

replace province_alternative = lower(province_alternative)
replace district_alternative = lower(district_alternative)
replace district_alternative = "zanda jan" if district_alternative == "zanda  jan"

gen anylab=0
replace anylab=1 if morphine_labs_amount >=1 & morphine_labs_amount !=.
replace anylab=1 if heroin_processing_labs_amount >=1 & heroin_processing_labs_amount !=.


save "processed\district398_cent_final_short.dta", replace

	
use "processed\GLK_final_analysis_temp.dta", clear
unique districtcode
unique district_alternative
unique OBJECTID

merge m:1 province_alternative district_alternative using "processed\district398_cent_final_short.dta"
keep if _merge == 1 | _merge == 3

xtset

summarize anylab if year > 2001 & year < 2015

unique OBJECTID_1
unique districtcode
tab districtcode, missing
unique OBJECTID
tab OBJECTID, missing

collapse (mean) labs_amount total_market_labs_weighted total_market_labs heroin_processing_labs_amount morphine_labs_amount sub_opium_markets_amount ///
major_opium_market_amount total_markets market_dummy     total_markets_w  nightlight ///
log_nightlight In_log_nightlight population pop lnpop In_log_pop OBJECTID OBJECTID_1 provid distance Major_Opium_Market Sub_Opium_Market Morphine_Lab 				Heroin_Processing_Lab Crystal_Heroin_Lab anylab, ///
by(province district districtcode)


keep labs_amount total_market_labs_weighted total_market_labs heroin_processing_labs_amount morphine_labs_amount sub_opium_markets_amount ///
major_opium_market_amount total_markets market_dummy total_markets_weighted districtcode province district 	nightlight ///
log_nightlight In_log_nightlight population pop lnpop In_log_pop OBJECTID OBJECTID_1 provid distance Major_Opium_Market Sub_Opium_Market Morphine_Lab 				Heroin_Processing_Lab Crystal_Heroin_Lab anylab

unique districtcode
sort districtcode

save "processed\merged_file_marketaccess.dta", replace //unique observations are defined by districtcode


*MODIFYING centroid_to_centroid_matrix
* We now switch to the Centroid_to_Centroid_Matrix. This matrix contains eigth measure for the distance between all 398 districts.
* 1st We rename some of these measures to facilitate comprehension later on. 
* 2nd Distances which are zero (meaning a distircts distance to itself) are set to 1 as the will be used as a divisor later on.
* 3rd We reshape it from the longformat to a wide format to create a "398to398" matirx. (where as its really a 398to398*8 matrix)
clear
set maxvar 30000

import delimited using "Centroid_to_Centroid_Matrix.txt", clear
drop total_stime_1 total_stime_3 total_time_1 total_time_2 total_time_3
*for testing purposes we have droped some of the measuring methods here
rename total_stime_2 total_stime_2_
drop objectid name destinationrank
rename originid OBJECTID_1
label variable  OBJECTID_1 "District ID from ArcGis"
replace total_length=. if total_length==0
replace total_slength=. if total_slength==0
replace total_stime_2=. if total_stime_2_==0

*this reshape is done to be able to merge Centroid_to_Centroid_Matrix.txt and district_centroid.csv
reshape wide total_length total_slength total_stime_2_, i(OBJECTID_1) j(destinationid)

* We merge the centroid_to_centroid_matrix with previously created merged_file
merge m:1 OBJECTID_1 using "processed\merged_file_marketaccess.dta", keep(1 3)
drop _merge

*CREATING the market access variables
* 1st  We create four local variables which will be used in the loop calculating the Market Access. 
* 2nd We create a loop which calculates the single Market Access for each district to each district individually.
* 3rd We add up, rename and label the Market Access for each district as whole using every combination of operationalisations of distance and population.
* 4th We drop the variables used in the calcualtion.

foreach X of varlist  total_length* total_slength* {
	replace `X'=`X'/10000 /*to get it in 10km*/ if `X' !=.
	replace `X'=1 if `X' ==.
}


local total_length total_length1-398
local total_slength total_slength1-398
local bevolkerung pop nightlight   lnpop log_nightlight Major_Opium_Market major_opium_market_amount Sub_Opium_Market ///
sub_opium_markets_amount labs_amount total_markets market_dummy total_markets_weighted total_market_labs_weighted total_market_labs 




local teta 1
local count = 1
gen number=[_n]



foreach var in `bevolkerung' {
	forvalues i = 1(1)398{
		quietly sum `var' if number==`i'
		scalar MeanX=r(mean)
		generate MA_`count'_`i' = (total_length`i'^(-`teta'))*MeanX 
		label var MA_`count'_`i' "Market Access `var'"
	}
local count = `count'+1
}


egen MA_pop_2D = rowtotal (MA_1_1-MA_1_398)
label variable MA_pop_2D "Market Access using pop as population and total_length as distance"

egen MA_nightlight_2D = rowtotal (MA_2_*)
label variable MA_nightlight_2D "Market Access using nightlight as population and total_length as distance"

egen MA_lnpop_2D = rowtotal (MA_3_*)
label variable MA_lnpop_2D "Market Access using log population and total_length as distance"

egen MA_lognightlight_2D = rowtotal (MA_4_*)
label variable MA_lognightlight_2D "Market Access using log nightlight as population and total_length as distance"

egen MA_Major_Opium_Market_2D = rowtotal (MA_5_*)
label variable MA_Major_Opium_Market_2D "Market Access using Major_Opium_Market as population and total_length as distance"

egen MA_major_opium_market_amount_2D = rowtotal (MA_6_*)
label variable MA_major_opium_market_amount_2D "Market Access using major_opium_market_amount as population and total_length as distance"

egen MA_Sub_Opium_Market_2D = rowtotal (MA_7_*)
label variable MA_Sub_Opium_Market_2D "Market Access using Sub_Opium_Market as population and total_length as distance"

egen MA_sub_opium_markets_amount_2D  = rowtotal (MA_8_*)
label variable MA_sub_opium_markets_amount_2D "Market Access using sub_opium_markets_amount as population and total_length as distance"

egen MA_labs_amount_2D = rowtotal (MA_9_*)
label variable MA_labs_amount_2D "Market Access using labs_amount as population and total_length as distance"

egen MA_total_markets_2D = rowtotal (MA_10_*)
label variable MA_total_markets_2D "Market Access using total_markets as population and total_length as distance"

egen MA_market_dummy_2D = rowtotal (MA_11_*)
label variable MA_market_dummy_2D "Market Access using market_dummy as population and total_length as distance"

egen MA_total_markets_weighted_2D = rowtotal (MA_12_*)
label variable MA_total_markets_weighted_2D "Market Access using total_markets_weighted as population and total_length as distance"

egen MA_total_market_labs_weighted_2D = rowtotal (MA_13_*)
label variable MA_total_market_labs_weighted_2D "Market Access using total_market_labs_weighted as population and total_length as distance"

egen MA_total_market_labs_2D = rowtotal (MA_14_*)
label variable MA_total_market_labs_2D "Market Access using total_market_labs as population and total_length as distance"

drop total_length*


local bevolkerung pop nightlight   lnpop log_nightlight Major_Opium_Market major_opium_market_amount Sub_Opium_Market ///
sub_opium_markets_amount labs_amount total_markets market_dummy total_markets_weighted total_market_labs_weighted total_market_labs 
local teta 1


foreach var in `bevolkerung' {
	forvalues i = 1(1)398{
		quietly sum `var' if number==`i'
		scalar MeanX=r(mean)
		generate MA_`count'_`i' = (total_slength`i'^(-`teta'))*MeanX 
		label var MA_`count'_`i' "Market Access `var'"
	}
local count = `count'+1
}


egen MA_pop_3D = rowtotal (MA_15_*)
label variable MA_pop_3D "Market Access using pop as population and slength as distance"

egen MA_nightlight_3D = rowtotal (MA_16_*)
label variable MA_nightlight_3D "Market Access using nightlight as population and slength as distance"

egen MA_lnpop_3D = rowtotal (MA_17_*)
label variable MA_lnpop_3D "Market Access using lnpop as population and slength as distance"

egen MA_lognightlight_3D = rowtotal (MA_18_*)
label variable MA_lognightlight_3D "Market Access using log nightlight as population and slength as distance"

egen MA_Major_Opium_Market_3D = rowtotal (MA_19_*)
label variable MA_Major_Opium_Market_3D "Market Access using Major_Opium_Market as population and slength as distance"

egen MA_major_opium_market_amount_3D = rowtotal (MA_20_*)
label variable MA_major_opium_market_amount_3D "Market Access using major_opium_market_amount as population and slength as distance"

egen MA_Sub_Opium_Market_3D = rowtotal (MA_21_*)
label variable MA_Sub_Opium_Market_3D "Market Access using Sub_Opium_Market as population and slength as distance"

egen MA_sub_opium_markets_amount_3D  = rowtotal (MA_22_*)
label variable MA_sub_opium_markets_amount_3D "Market Access using sub_opium_markets_amount as population and slength as distance"

egen MA_labs_amount_3D = rowtotal (MA_23_*)
label variable MA_labs_amount_3D "Market Access using labs_amount as population and slength as distance"

egen MA_total_markets_3D = rowtotal (MA_24_*)
label variable MA_total_markets_3D "Market Access using total_markets as population and slength as distance"

egen MA_market_dummy_3D = rowtotal (MA_25_*)
label variable MA_market_dummy_3D "Market Access using market_dummy as population and slength as distance"

egen MA_total_markets_weighted_3D = rowtotal (MA_26_*)
label variable MA_total_markets_weighted_3D "Market Access using total_markets_weighted as population and slength as distance"

egen MA_total_market_labs_weighted_3D = rowtotal (MA_27_*)
label variable MA_total_market_labs_weighted_3D "Market Access using total_market_labs_weighted as population and slength as distance"

egen MA_total_market_labs_3D = rowtotal (MA_28_*)
label variable MA_total_market_labs_3D "Market Access using total_market_labs as population and slength as distance"

drop total_slength* 

drop nightlight population pop Major_Opium_Market Morphine_Lab Sub_Opium_Market Heroin_Processing_Lab Crystal_Heroin_Lab lnpop log_nightlight In_log_nightlight In_log_pop provid distance

rename sub_opium_markets_amount submarkets_n
rename major_opium_market_amount majormarket_n
rename total_markets_weighted total_markets_w
rename morphine_labs_amount morphine_labs_n
rename heroin_processing_labs_amount heroinprocess_labs_n
rename total_market_labs_weighted totmarketlabs_w
rename MA_Major_Opium_Market_2D ma_majormarket_2d
rename MA_major_opium_market_amount_2D ma_majormarket_2d_n
rename MA_Sub_Opium_Market_2D ma_submarket_2d
rename MA_sub_opium_markets_amount_2D ma_submarket_2d_n
rename MA_labs_amount_2D ma_labs_amount_2d
rename MA_total_markets_2D ma_totmarkets_2d
rename MA_total_markets_weighted_2D ma_totmarkets_2d_w
rename MA_total_market_labs_weighted_2D ma_totmarketlabs_2d_w
rename MA_total_market_labs_2D ma_totmarketlabs_2d
rename MA_pop_3D ma_pop_3d
rename MA_nightlight_3D ma_nighlight_3d
rename MA_lnpop_3D ma_lnpop_3d
rename MA_Major_Opium_Market_3D ma_majormarket_3d
rename MA_major_opium_market_amount_3D ma_majormarket_3d_n
rename MA_Sub_Opium_Market_3D ma_submarket_3d
rename MA_sub_opium_markets_amount_3D ma_submarket_3d_n
rename MA_labs_amount_3D ma_labs_amount_3d
rename MA_total_markets_3D  ma_totmarkets_3D
rename MA_market_dummy_3D ma_marketdummy_3d
rename MA_total_markets_weighted_3D  ma_totmarkets_3d_w
rename MA_total_market_labs_weighted_3D ma_totmarketlabs_3d_w
rename MA_total_market_labs_3D ma_totmarketlabs_3d
rename MA_market_dummy_2D ma_market_dummy_2d
rename MA_pop_2D ma_pop_2d
rename MA_nightlight_2D ma_nightlight_2d
rename MA_lnpop_2D ma_lnpop_2d

gen total_stime2_Kunduz=total_stime_2_268
gen total_stime2_Kandahar=total_stime_2_46
gen total_stime2_Hirat=total_stime_2_109
gen total_stime2_Mazari=total_stime_2_309
gen total_stime2_Jalalabad=total_stime_2_126

forvalues i=1(1)28 {
	drop MA_`i'_*
}

* Remove missing districtcode 

keep if districtcode != .

save "processed\market_access.dta", replace



/*******************************************************************************
*                                Spatial Lags                                  *
*******************************************************************************/
*Create spatial lags

***Neighbor district conflict
import excel "district-neighbors_codes.xlsx", sheet("district-neighbors0912") firstrow clear
drop OBJECTID
rename src_DIST_3 districtname
replace districtname=lower(districtname)
rename nbr_DIST_3 neighborname
replace neighborname=lower(neighborname)
rename src_DISTID distid
rename nbr_DISTID neighborcode

merge m:1 distid using "processed\districts_fromshp398.dta" 
drop _merge
rename objectid OBJECTID
destring OBJECTID, replace 
merge m:1 OBJECTID using "processed\districtnames.dta" 
drop OBJECTID prov_34_na dist_34_na provid distid _merge 
rename province province_main
rename districtcode districtcode_main
rename district district_main


rename neighborcode distid
merge m:1 distid using "processed\districts_fromshp398.dta" 
drop _merge
rename objectid OBJECTID
destring OBJECTID, replace 
merge m:1 OBJECTID using "processed\districtnames.dta" 
drop OBJECTID prov_34_na dist_34_na provid distid _merge 
rename province province_neighbor
rename district district_neighbor
drop districtname neighborname NODE_COUNT


***Combine with other data to build spatial lags

expand 16

egen group=group(districtcode_main districtcode)
sort group
bysort group: gen year=2000+_n-1
sort districtcode_main districtcode year

merge m:1 districtcode year using "processed\GLK_final_analysis_temp.dta"
drop if _merge==2
drop _merge province district
rename districtcode districtcode_neighbor
rename province_main province
rename districtcode_main districtcode
rename district_main district

***Spatial lags

*** shock
foreach X in  opiumshock_ln* lnwheat_shock lnbrd smallconflict lowconflict conflict war  {
	rename `X' n_`X'
}


keep province districtcode district province_neighbor districtcode_neighbor district_neighbor year n_*
collapse (mean) n_* (last) district province, by(districtcode year)


save "processed\spatiallags.dta", replace


use "processed\GLK_final_analysis_temp.dta", clear


* Market Access
merge m:1 districtcode using "processed\market_access.dta"

unique districtcode

keep if _merge == 1 | _merge == 3
unique districtcode
drop _merge


*Generate distances to Kabul and other cities	
gen dist_d_Kabul_50=0
replace dist_d_Kabul_50=1 if dist_d_Kabul<50
gen dist_d_Kabul_75=0
replace dist_d_Kabul_75=1 if dist_d_Kabul<75
gen dist_d_Kabul_100=0
replace dist_d_Kabul_100=1 if dist_d_Kabul<100
gen dist_d_Kabul_125=0
replace dist_d_Kabul_125=1 if dist_d_Kabul<125
	
gen dist_anycity_50=0
replace dist_anycity_50=1 if dist_d_Kunduz<50 | dist_d_Kandahar<50 | dist_d_Hirat<50 | dist_d_Mazari_Sharif<50 | dist_d_Jalalabad<50
gen dist_anycity_75=0
replace dist_anycity_75=1 if dist_d_Kunduz<75 | dist_d_Kandahar<75 | dist_d_Hirat<75 | dist_d_Mazari_Sharif<75 | dist_d_Jalalabad<75
gen dist_anycity_100=0
replace dist_anycity_100=1 if dist_d_Kunduz<100 | dist_d_Kandahar<100 | dist_d_Hirat<100 | dist_d_Mazari_Sharif<100 | dist_d_Jalalabad<100 
gen dist_anycity_125=0
replace dist_anycity_125=1 if dist_d_Kunduz<125 | dist_d_Kandahar<125 | dist_d_Hirat<125 | dist_d_Mazari_Sharif<125 | dist_d_Jalalabad<125 

gen totaltime_1=0
replace totaltime_1=1 if total_stime_2<1
gen totaltime_2=0
replace totaltime_2=1 if total_stime_2<2
gen totaltime_3=0
replace totaltime_3=1 if total_stime_2<3
gen totaltime_1_anycity=0
replace totaltime_1_anycity=1 if total_stime2_Kunduz<1 | total_stime2_Kandahar<1 | total_stime2_Hirat<1 | total_stime2_Mazari<1 | total_stime2_Jalalabad<1
gen totaltime_2_anycity=0
replace totaltime_2_anycity=1 if total_stime2_Kunduz<2 | total_stime2_Kandahar<2 | total_stime2_Hirat<2 | total_stime2_Mazari<2 | total_stime2_Jalalabad<2
gen totaltime_3_anycity=0
replace totaltime_3_anycity=1 if total_stime2_Kunduz<3 | total_stime2_Kandahar<3 | total_stime2_Hirat<3 | total_stime2_Mazari<3 | total_stime2_Jalalabad<3			
			
			
*Alternative IV: legal opioid prescription
gen prescription_shock=prescription*suitability_rw_opium
gen lnprescription=log(prescription)
gen lnprescription_shock=lnprescription*suitability_rw_opium

*Add spatial lags
merge 1:1 districtcode year using "processed\spatiallags.dta"
drop if year==2015 | year==.
drop if _merge==2
drop _merge
xtset districtcode year

sort districtcode year
foreach X of varlist lnwheat_rw_shock wheat_rw_shock lnwheat_shock eradication conflict smallconflict lowconflict best_est lnbrd war lnrevenue revenue ///
nightlight n_* vhi {
	bysort districtcode: gen l`X'=l.`X'
	bysort districtcode: gen l2`X'=l2.`X'
	bysort districtcode: gen l3`X'=l3.`X'
}


sort districtcode year
xtset districtcode year


****Gen moving average for cultivation and revenue
xtset
gen mlncultivation=(lncultivation+l.lncultivation)/2
gen mcultivation=(cultivation+l.cultivation)/2
gen lnmcultivation=log(mcultivation+1) if year>=2001 & year<=2014

gen mlnrevenue=(lnrevenue+l.lnrevenue)/2


save "processed\GLK_final_analysis_temp1.dta", replace


***Time camps
use "processed\camps.dta", clear

merge 1:m districtcode using "processed\GLK_final_analysis_temp1.dta"
keep if _merge==3 & year==2002 


keep districtcode OBJECTID_1 OBJECTID camps

save "processed\camps_geo.dta", replace 


***Distance Camps
import delimited "linear_distance_pair.csv", clear

merge m:1 districtcode using "processed\camps.dta"

keep if _merge==3
keep districtcode destcode linear_distance  
rename linear_distance dist_camps
reshape wide dist_camps , j(districtcode) i(destcode)

rename destcode districtcode
save "processed\camps_geo1.dta", replace 




import delimited using "Centroid_to_Centroid_Matrix.txt", clear

drop total_stime_1 total_stime_3 total_time_1 total_time_2 total_time_3

ren total_stime_2 total_stime_2_
drop objectid name destinationrank
rename originid OBJECTID_1
label variable  OBJECTID_1 "District ID from ArcGis"
merge m:1  OBJECTID_1  using "processed\camps_geo.dta" 
keep if _merge==3
keep OBJECTID_1 destinationid total_stime_2_
rename total_stime_2_ time_camps 

reshape wide time_camps , j(OBJECTID_1) i(destinationid)

rename destinationid OBJECTID_1

save "processed\camps_geo_time.dta", replace


use "processed\GLK_final_analysis_temp1.dta", clear

merge m:1 OBJECTID_1 using "processed\camps_geo_time.dta"
drop _merge
merge m:1 districtcode using "processed\camps_geo1.dta"
drop _merge

unique districtcode

egen time_camp = rowmin(time_camps*) 
drop time_camps* 

summarize time_camp if year > 2001


forvalues x= 1(1)3{
	cap gen time_camp_`x'=0
	replace time_camp_`x'=1 if time_camp<`x'
	
}

summarize time_camp_1 time_camp_2 time_camp_3 if year > 2001


				
*generate categorial variables for territorial control

gen groupcontrol_ethno="."
replace groupcontrol_ethno="Taliban" if pashtun_ethno==1 & time_camp_2!=1
replace groupcontrol_ethno="Gov" if time_camp_2==1
replace groupcontrol_ethno="Contested" if groupcontrol_ethno=="."

gen groupcontrol_taliban1996="."
replace groupcontrol_taliban1996="Taliban" if taliban1996==1 & time_camp_2!=1
replace groupcontrol_taliban1996="Gov" if time_camp_2==1
replace groupcontrol_taliban1996="Contested" if groupcontrol_taliban1996=="."

gen groupcontrol_greg="."
replace groupcontrol_greg="Taliban" if pashtun_greg==1 & time_camp_2!=1
replace groupcontrol_greg="Gov" if time_camp_2==1
replace groupcontrol_greg="Contested" if groupcontrol_greg=="."



*prepare some sigacts variables
gen nocasualty_b3event=total_b3event-casualty_b3event

gen IED_share=ied_explosion/total_b3event*100
gen IED_share_nonmiss=IED_share
replace IED_share_nonmiss=0 if total_b3event==0

gen DF_share=df/total_b3event*100
gen DF_share_nonmiss=DF_share
replace DF_share_nonmiss=0 if total_b3event==0

xtset

tab districtcode, missing
tab year, missing


		 
save "processed\GLK_final_analysis_external.dta", replace
